From a6f202275ec093b9f8948d77b9783f0820f930d8 Mon Sep 17 00:00:00 2001
From: dotdotisdead <dotdotisdead@a3eca27d-f21b-0410-9b4a-6511e771f64e>
Date: Sun, 27 Aug 2006 18:39:38 +0000
Subject: Source snapshot... broken.

---
 xdelta3/.gdb_history               |  256 ++
 xdelta3/.xdelta3.prcs_aux          |   84 +
 xdelta3/COPYING                    |  340 ++
 xdelta3/ChangeLog                  |    6 +
 xdelta3/Makefile                   |  111 +
 xdelta3/RELEASE.NOTES              |    3 +
 xdelta3/badcopy.c                  |  111 +
 xdelta3/draft-korn-vcdiff.txt      | 1322 ++++++++
 xdelta3/junk.py                    |   11 +
 xdelta3/linkxd3lib.c               |   47 +
 xdelta3/rcs_junk.cc                | 1861 +++++++++++
 xdelta3/setup.py                   |   33 +
 xdelta3/show.c                     |   41 +
 xdelta3/testh.c                    |    1 +
 xdelta3/www/xdelta3-api-guide.html |  212 ++
 xdelta3/www/xdelta3-cmdline.html   |  166 +
 xdelta3/www/xdelta3.css            |   69 +
 xdelta3/www/xdelta3.html           |   89 +
 xdelta3/xdelta3-cfgs.h             |  118 +
 xdelta3/xdelta3-djw.h              | 1917 ++++++++++++
 xdelta3/xdelta3-fgk.h              |  851 +++++
 xdelta3/xdelta3-list.h             |  130 +
 xdelta3/xdelta3-main.h             | 2923 +++++++++++++++++
 xdelta3/xdelta3-python.h           |   86 +
 xdelta3/xdelta3-regtest.py         |  596 ++++
 xdelta3/xdelta3-second.h           |  363 +++
 xdelta3/xdelta3-test.h             | 2229 +++++++++++++
 xdelta3/xdelta3.c                  | 6022 ++++++++++++++++++++++++++++++++++++
 xdelta3/xdelta3.h                  | 1029 ++++++
 xdelta3/xdelta3.prj                |  133 +
 30 files changed, 21160 insertions(+)
 create mode 100755 xdelta3/.gdb_history
 create mode 100755 xdelta3/.xdelta3.prcs_aux
 create mode 100755 xdelta3/COPYING
 create mode 100755 xdelta3/ChangeLog
 create mode 100755 xdelta3/Makefile
 create mode 100755 xdelta3/RELEASE.NOTES
 create mode 100755 xdelta3/badcopy.c
 create mode 100755 xdelta3/draft-korn-vcdiff.txt
 create mode 100755 xdelta3/junk.py
 create mode 100755 xdelta3/linkxd3lib.c
 create mode 100755 xdelta3/rcs_junk.cc
 create mode 100755 xdelta3/setup.py
 create mode 100755 xdelta3/show.c
 create mode 100755 xdelta3/testh.c
 create mode 100755 xdelta3/www/xdelta3-api-guide.html
 create mode 100755 xdelta3/www/xdelta3-cmdline.html
 create mode 100755 xdelta3/www/xdelta3.css
 create mode 100755 xdelta3/www/xdelta3.html
 create mode 100755 xdelta3/xdelta3-cfgs.h
 create mode 100755 xdelta3/xdelta3-djw.h
 create mode 100755 xdelta3/xdelta3-fgk.h
 create mode 100755 xdelta3/xdelta3-list.h
 create mode 100755 xdelta3/xdelta3-main.h
 create mode 100755 xdelta3/xdelta3-python.h
 create mode 100755 xdelta3/xdelta3-regtest.py
 create mode 100755 xdelta3/xdelta3-second.h
 create mode 100755 xdelta3/xdelta3-test.h
 create mode 100755 xdelta3/xdelta3.c
 create mode 100755 xdelta3/xdelta3.h
 create mode 100755 xdelta3/xdelta3.prj

(limited to 'xdelta3')

diff --git a/xdelta3/.gdb_history b/xdelta3/.gdb_history
new file mode 100755
index 0000000..72410e8
--- /dev/null
+++ b/xdelta3/.gdb_history
@@ -0,0 +1,256 @@
+run test
+i[
+up
+print tpos
+print recon_size
+break xdelta3-test.h:2323
+run
+s
+s
+n
+print input_size
+up
+up
+print delta
+print delta_size
+run -vv -f -s ~/Desktop/hello.c ~/Desktop/world.c hw
+up
+down
+break xdelta3-main.h:2252
+run
+s
+n
+c
+c
+c
+run -vv -f -s ~/Desktop/hello.c ~/Desktop/world.c hw
+run -vv -f -s testcase/6/source testcase/6/target
+break xdelta3.c:5792
+run
+up
+updown
+break xdelta3.c:3837
+run
+s
+n
+print matchoff
+print streamoff
+print tryblk
+print tryoff
+n
+n
+n
+print stream->match_maxfwd
+print stream->match_fwd
+print str->cublk
+print str->curblk
+print src->curblk
+print tryoff
+n
+n
+print tryoff
+n
+print tryoff
+print src->curblk[21]
+print stream->next_in[21]
+print src->curblk
+print stream->next_in
+break xdelta3.c:5726
+c
+n
+print stream->match_fwd
+n
+n
+n
+n
+n
+n
+s
+n
+n
+n
+n
+n
+n
+n
+step 1
+step 1
+run -s testcase/3/source.doc testcase/3/target.doc
+break xdelta3.c:2697
+run
+p blkno
+p source->blocks
+up
+run -s testcase/3/source.doc testcase/3/target.doc out
+run -s testcase/3/source.doc testcase/3/target.doc > /dev/null
+break xdelta3.c:5095
+run
+p logical_input_cksum_pos
+p stream->input_pos
+p stream->input_position
+p stream->srcwin_size
+p stream->total_in
+n
+p logical_input_cksum_pos
+p stream-srcwin_cksum_pos
+p stream->srcwin_cksum_pos
+n
+n
+n
+p stream->srcwin_size
+n
+n
+p blkno
+p blkoff
+p onblk
+n
+n
+n
+break xdelta3.c:5114
+c
+n
+k
+y
+step 1
+d
+break xdelta3.c:5103
+break xdelta3.c:5097
+run
+n
+p logical_input_cksum_pos
+p stream->srcwin_size
+c
+n
+p stream->srcwin_cksum_pos
+run
+n
+c
+fin
+up
+down
+c
+up
+break xdelta3.c:5131
+c
+n
+p diff
+p onblk
+n
+p onblk
+p blkoff
+p blkoff
+p stream->large_look
+k
+y
+c
+k
+break xdelta3.c:5103
+run -s testcase/3/source.doc testcase/3/target.doc
+n
+o ibbkj
+p onblk
+n
+p blkoff
+p onblk
+n
+p blkoff
+break xdelta3.c:5119
+c
+n
+p stream->input_position
+p stream->srcwin_cksum_pos
+p stream->stream->srcwin_size
+p stream->srcwin_size
+p logical_input_cksum_pos
+p *next_move_point
+c
+n
+p stream->input_position
+p logical_input_cksum_pos
+p logical_input_cksum_pos
+p stream->srcwin_cksum_pos
+d
+c
+run -s testcase/3/source.doc testcase/3/target.doc -o /tmp/foo12
+run -s testcase/3/source.doc testcase/3/target.doc >  /dev/null
+run -vv -s testcase/3/source.doc testcase/3/target.doc >  /dev/null
+k
+y
+run -vv -s testcase/3/source.doc testcase/3/target.doc
+run -vv -s testcase/3/source.doc testcase/3/target.doc
+run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null
+up
+n
+fin
+n
+break xdelta3.c:5119
+c
+c
+p stream->srcwin_cksum_pos
+p logical_input_cksum_pos
+p stream->total_iun
+p stream->total_i
+p stream->total_in
+p stream->srcwin_cksum_pos
+k
+run -vv -s testcase/3/source.doc testcase/3/target.doc  /tmp/fdsfd
+kill
+run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null
+run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null
+y
+run
+run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null
+run
+run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null
+n
+fin
+n
+p blkoff
+p stream->large_look
+p onblk
+break xdelta3.c:5122
+c
+n
+p blkno
+p stream->srcwin_cksum_pos
+up
+down
+p blkno
+p blkno * (1 <<18)
+p stream->srcwin_cksum_pos
+c
+p blkno * (1 <<18)
+p blkno
+c
+n
+n
+p onblk
+n
+p onblk
+p diff
+p stream->srcwin_cksum_pos
+n
+c
+n
+p stream->srcwin_cksum_pos
+p blkno
+c
+n
+p blkno
+c
+n
+c
+n
+p stream->srcwin_cksum_pos
+p logical_input_cksum_pos
+n
+run -vv -s testcase/3/source.doc testcase/3/target.doc
+run -vv -s testcase/3/source.doc testcase/3/target.doc > /dev/null
+up
+break xdelta3.c:5123
+c
+break xdelta3.c:5097
+c
+p stream->srcwin_cksum_pos
+p stream->logical_input_pos
+p logical_input_cksum_pos
diff --git a/xdelta3/.xdelta3.prcs_aux b/xdelta3/.xdelta3.prcs_aux
new file mode 100755
index 0000000..b30689c
--- /dev/null
+++ b/xdelta3/.xdelta3.prcs_aux
@@ -0,0 +1,84 @@
+;; This file is automatically generated, editing may cause PRCS to do
+;; REALLY bad things.
+(Created-By-Prcs-Version 1 3 3)
+(www/xdelta3.css 938 1085949140 b/26_xdelta3.cs 1.3)
+(analyze_pfx.py 1422 1022037044 12_analyze_pf 1.1)
+(badcopy.c 2622 1047759845 20_badcopy.c 1.1)
+(analyze_clen.py 1342 1021753567 14_analyze_cl 1.1)
+(save.regtest.bug1/input.4 19022 1055471779 b/16_input.4 1.1)
+(save.regtest.bug11/input.0 4 1055554284 b/8_input.0 1.1)
+(save.regtest.bug8/input.0 203756 1055518432 28_input.0 1.1)
+(save.regtest.bug1/input.5 21597 1055471779 b/17_input.5 1.1)
+(save.regtest.bug6/input.20 1235 1055474005 39_input.20 1.1)
+(save.regtest.bug11/input.1 10 1055554284 b/7_input.1 1.1)
+(xdelta3-list.h 9892 1052598762 6_xdelta3-li 1.1)
+(save.regtest.bug8/input.1 203756 1055518493 29_input.1 1.1)
+(save.regtest.bug7/recon 51200 1055515262 36_recon 1.1)
+(save.regtest.bug6/input.21 952 1055474005 38_input.21 1.1)
+(save.regtest.bug6/recon 952 1055480638 37_recon 1.1)
+(save.regtest.bug11/recon.x 10 1055554520 b/5_recon.x 1.1)
+(vcdiff.ps 131548 1014968851 b/19_vcdiff.ps 1.1)
+(www/Xdelta3.html 3200 1058668417 b/24_Xdelta3.ht 1.1)
+(priorities.txt 339 1057496665 b/18_priorities 1.1)
+(draft-korn-vcdiff.txt 60706 1018424758 b/22_draft-korn 1.1)
+(dead.code 72096 1085893991 b/21_dead.code 1.2)
+(linkxd3lib.c 1113 1056324075 19_linkxd3lib 1.1)
+(www/xdelta3.html 4708 1085952599 b/24_Xdelta3.ht 1.4)
+(xdelta3-second.h 8228 1057405215 3_xdelta3-se 1.1)
+(testh.c 21 1042671351 17_testh.c 1.1)
+(save.regtest.bug6/input.0 920 1055474005 40_input.0 1.1)
+(save.regtest.bug12/output.x 705 1055556257 b/1_output.x 1.1)
+(save.regtest.bug8/core 1159168 1055529025 23_core 1.1)
+(save.regtest.bug9/foo,v 123233 1055532021 21_foo,vx 1.1)
+(xdelta3.c 201721 1085893369 16_xdelta3.c 1.3)
+(xdelta3-cfgs.h 2701 1057695639 9_xdelta3-cf 1.1)
+(save.regtest.bug10/input.0 53274 1055532189 b/14_input.0 1.1)
+(xdelta3-regtest.py 17976 1085947234 10_xdelta3-re 1.3)
+(save.regtest.bug10/input.1 74663 1055532189 b/13_input.1 1.1)
+(save.regtest.bug4/input.0 7571 1055461840 45_input.0 1.1)
+(save.regtest.bug12/xd3regtest.27181/output 2336 1055566927 b/0_output 1.1)
+(save.regtest.bug4/input.1 11312 1055461840 44_input.1 1.1)
+(save.regtest.bug7/core 1146880 1055522004 30_core 1.1)
+(xdelta3-main.h 79350 1085950532 5_xdelta3-ma 1.3)
+(xdelta3.h 41796 1084138546 1_xdelta3.h 1.2)
+(rcs_junk.cc 36315 1055086755 15_rcs_junk.c 1.1)
+(www/xdelta3-cmdline.html 5234 1085953288 b/25_xdelta3-cm 1.2)
+(save.regtest.bug7/input.0 7571 1055515262 34_input.0 1.1)
+(save.regtest.bug7/output 14276 1055515262 35_output 1.1)
+(save.regtest.bug2/input.0 2296 1055471815 49_input.0 1.1)
+(save.regtest.bug7/input.1 11312 1055515262 33_input.1 1.1)
+(www/xdelta3-api-guide.html 7553 1085953324 b/23_Xdelta3-ap 1.4)
+(save.regtest.bug8/input.0.xz 70595 1055518500 27_input.0.xz 1.1)
+(save.regtest.bug2/input.1 2521 1055471815 48_input.1 1.1)
+(save.regtest.bug9/foo2,v 123233 1055532069 22_foo2,vx 1.1)
+(xdelta3-python.h 1466 1055671733 4_xdelta3-py 1.1)
+(save.regtest.bug10/output 48388 1055532189 b/15_output 1.1)
+(save.regtest.bug10/output.x 48408 1055533319 b/12_output.x 1.1)
+(save.regtest.bug7/recon.x 11312 1055517553 31_recon.x 1.1)
+(save.regtest.bug11/recon 10 1055554284 b/10_recon 1.1)
+(save.regtest.bug8/input.1.xz 89734 1055518503 26_input.1.xz 1.1)
+(xdelta3-test.h 71959 1084138350 2_xdelta3-te 1.2)
+(save.regtest.bug10/recon.x 74655 1055533328 b/11_recon.x 1.1)
+(save.regtest.bug12/input.0 280 1055555649 b/3_input.0 1.1)
+(save.regtest.bug12/input.1 1155 1055555649 b/2_input.1 1.1)
+(draft-vcdiff-huffman.txt 2935 1021721074 b/20_draft-vcdi 1.1)
+(save.regtest.bug8/output.x.right 48221 1055520912 24_output.x.r 1.1)
+(save.regtest.bug5/input.0 7571 1055471668 43_input.0 1.1)
+(save.regtest.bug11/output.x 45 1055554519 b/6_output.x 1.1)
+(save.regtest.bug12/xd3regtest.27181/input.0 2521 1055566927 51_input.0 1.1)
+(save.regtest.bug5/input.1 11312 1055471668 42_input.1 1.1)
+(save.regtest.bug12/xd3regtest.27181/input.1 2296 1055566927 50_input.1 1.1)
+(setup.py 626 1055562104 11_setup.py 1.1)
+(Makefile 3840 1085893399 0_Makefile 1.3)
+(save.regtest.bug3/input.0 732 1055471934 47_input.0 1.1)
+(save.regtest.bug6/output 69 1055480559 41_output 1.1)
+(xdelta3-fgk.h 21496 1057610026 7_xdelta3-fg 1.1)
+(save.regtest.bug3/input.1 271 1055471934 46_input.1 1.1)
+(www/Xdelta3-api.html 6128 1058674572 b/23_Xdelta3-ap 1.1)
+(save.regtest.bug12/output 39 1055555649 b/4_output 1.1)
+(analyze_gp.py 7442 1022750342 13_analyze_gp 1.1)
+(save.regtest.bug11/output 45 1055554284 b/9_output 1.1)
+(save.regtest.bug7/output.x 14296 1055517870 32_output.x 1.1)
+(save.regtest.bug8/output.x 48225 1055530557 25_output.x 1.1)
+(show.c 647 1043318861 18_show.c 1.1)
+(xdelta3-djw.h 51152 1057610015 8_xdelta3-dj 1.1)
diff --git a/xdelta3/COPYING b/xdelta3/COPYING
new file mode 100755
index 0000000..5b6e7c6
--- /dev/null
+++ b/xdelta3/COPYING
@@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/xdelta3/ChangeLog b/xdelta3/ChangeLog
new file mode 100755
index 0000000..d7bd042
--- /dev/null
+++ b/xdelta3/ChangeLog
@@ -0,0 +1,6 @@
+2006-07-02  Joshua MacDonald  <jmacd@google.com>
+
+	* xdelta3.c (xd3_iopt_flush_instructions): Fixed a bug in flush_instructions
+	clears more than half of the instructions, thus encodes the last two, which may
+	still overlap.
+
diff --git a/xdelta3/Makefile b/xdelta3/Makefile
new file mode 100755
index 0000000..0ef4722
--- /dev/null
+++ b/xdelta3/Makefile
@@ -0,0 +1,111 @@
+##
+PYTHON = python
+PYTGT = build/temp.linux-i686-2.3/xdelta3.so
+
+TARGETS = xdelta3 xdelta3-64 xdelta3-everything \
+	  xdelta3-Opg xdelta3-64-O xdelta3-Op xdelta3-O \
+	  xdelta3-decoder xdelta3-decoder-nomain.o \
+	  $(PYTGT) \
+	  xdelta3-nosec.o xdelta3-all.o xdelta3-fgk.o xdelta3-djw.o \
+	  xdelta3-noext xdelta3-tools xdelta3-tune \
+	  xdelta3-notools
+
+SOURCES = xdelta3.c xdelta3.h xdelta3-fgk.h xdelta3-djw.h xdelta3-list.h xdelta3-test.h \
+	  xdelta3-main.h xdelta3-cfgs.h xdelta3-second.h xdelta3-python.h
+
+PYFILES = xdelta3-regtest.py setup.py
+
+EXTRA = Makefile COPYING linkxd3lib.c badcopy.c www RELEASE.NOTES
+
+REL = 0f
+RELDIR = xdelta3$(REL)
+
+all: $(TARGETS)
+
+tar:
+	tar -czf /tmp/$(RELDIR)-tmp.tar.gz $(SOURCES) $(PYFILES) $(EXTRA)
+	rm -rf /tmp/$(RELDIR)
+	mkdir /tmp/$(RELDIR)
+	(cd /tmp/$(RELDIR) && tar -xzf ../$(RELDIR)-tmp.tar.gz)
+	tar -czf ./$(RELDIR).tar.gz -C /tmp $(RELDIR)
+	+tar -tzf ./$(RELDIR).tar.gz
+	rm -rf /tmp/$(RELDIR)
+
+clean:
+	rm -f $(TARGETS) xdtest.* core *.flc
+
+$(PYTGT): $(SOURCES)
+	$(PYTHON) setup.py install --compile --force
+
+xdelta3: $(SOURCES)
+	$(CC) -g -Wall -Wshadow xdelta3.c -o xdelta3 -DXD3_MAIN=1 -DGENERIC_ENCODE_TABLES=1 \
+		-DXD3_USE_LARGEFILE64=1 -DREGRESSION_TEST=1 -DXD3_DEBUG=2 -DSECONDARY_DJW=1 -lm
+
+xdelta3-decoder: $(SOURCES)
+	$(CC) -O2 -Wall -Wshadow xdelta3.c \
+	    -DXD3_ENCODER=0 -DXD3_MAIN=1 -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 \
+	    -DXD3_POSIX=0 -DEXTERNAL_COMPRESSION=0 -DVCDIFF_TOOLS=0 \
+	    -o xdelta3-decoder
+	strip xdelta3-decoder
+
+xdelta3-decoder-nomain.o: $(SOURCES) linkxd3lib.c
+	$(CC) -O2 -Wall -Wshadow xdelta3.c linkxd3lib.c \
+	    -DXD3_ENCODER=0 -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 \
+	    -o xdelta3-decoder-nomain.o
+	strip xdelta3-decoder-nomain.o
+
+xdelta3-O: $(SOURCES)
+	$(CC) -g -O2 -Wall -Wshadow xdelta3.c -o xdelta3-O -DXD3_MAIN=1 -DSECONDARY_DJW=1 -DREGRESSION_TEST=1 -lm
+
+xdelta3-O++: $(SOURCES)
+	$(CXX) -g -O2 -Wall -Wshadow xdelta3.c -o xdelta3-O++ -DXD3_MAIN=1 -DSECONDARY_DJW=1 -DREGRESSION_TEST=1 -lm
+
+xdelta3-Op: $(SOURCES)
+	$(CC) -g -O2 -Wall -Wshadow xdelta3.c -o xdelta3-Op -DXD3_POSIX=1 -DXD3_MAIN=1 -DREGRESSION_TEST=1 -lm
+
+xdelta3-64: $(SOURCES)
+	$(CC) -g -Wall -Wshadow xdelta3.c -o xdelta3-64 -DXD3_POSIX=1 -DXD3_MAIN=1 -DREGRESSION_TEST=1 \
+					-DXD3_DEBUG=0 -DXD3_USE_LARGEFILE64=1 -lm
+
+xdelta3-64-O: $(SOURCES)
+	$(CC) -O2 -Wall -Wshadow xdelta3.c -o xdelta3-64-O -DXD3_POSIX=1 -DXD3_MAIN=1 \
+					-DXD3_USE_LARGEFILE64=1 -lm
+
+xdelta3-everything: $(SOURCES)
+	$(CC) -g -Wall -Wshadow xdelta3.c -o xdelta3-everything \
+					-DXD3_MAIN=1 -DVCDIFF_TOOLS=1 -DREGRESSION_TEST=1 \
+					-DSECONDARY_FGK=1 -DSECONDARY_DJW=1 \
+					-DGENERIC_ENCODE_TABLES=1 \
+					-DGENERIC_ENCODE_TABLES_COMPUTE=1 \
+					-DXD3_POSIX=1 \
+					-DEXTERNAL_COMPRESSION=1 \
+					-DXD3_DEBUG=1 -lm
+
+xdelta3-tune: $(SOURCES)
+	$(CC) -O2 -Wall -Wshadow xdelta3.c -o xdelta3-tune -DXD3_MAIN=1 \
+		-DSECONDARY_FGK=1 -DSECONDARY_DJW=1 -DTUNE_HUFFMAN=1
+
+xdelta3-Opg: $(SOURCES)
+	$(CC) -pg -g -O3 -Wall -Wshadow xdelta3.c -o xdelta3-Opg -DXD3_MAIN=1 \
+		-DSECONDARY_DJW=1 -DXD3_POSIX=1 -DXD3_USE_LARGEFILE64=1
+
+xdelta3-nosec.o: $(SOURCES)
+	$(CC) -O2 -Wall -Wshadow -c xdelta3.c -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 -o xdelta3-nosec.o
+
+xdelta3-all.o: $(SOURCES)
+	$(CC) -O2 -Wall -Wshadow -c xdelta3.c -DSECONDARY_FGK=1 -DSECONDARY_DJW=1 -o xdelta3-all.o
+
+xdelta3-fgk.o: $(SOURCES)
+	$(CC) -O2 -Wall -Wshadow -c xdelta3.c -DSECONDARY_FGK=1 -DSECONDARY_DJW=0 -o xdelta3-fgk.o
+
+xdelta3-djw.o: $(SOURCES)
+	$(CC) -O2 -Wall -Wshadow -c xdelta3.c -DSECONDARY_FGK=0 -DSECONDARY_DJW=1 -o xdelta3-djw.o
+
+xdelta3-noext: $(SOURCES)
+	$(CC) -O2 -Wall -Wshadow xdelta3.c -DXD3_MAIN=1 -DEXTERNAL_COMPRESSION=0 -o xdelta3-noext
+
+xdelta3-tools: $(SOURCES)
+	$(CC) -O2 -Wall -Wshadow xdelta3.c -DXD3_MAIN=1 -o xdelta3-tools
+
+xdelta3-notools: $(SOURCES)
+	$(CC) -O2 -Wall -Wshadow xdelta3.c -DXD3_MAIN=1 -DVCDIFF_TOOLS=0 -o xdelta3-notools
diff --git a/xdelta3/RELEASE.NOTES b/xdelta3/RELEASE.NOTES
new file mode 100755
index 0000000..a4af327
--- /dev/null
+++ b/xdelta3/RELEASE.NOTES
@@ -0,0 +1,3 @@
+2006-05-13  Joshua MacDonald  <joshua.macdonald@gmail.com>
+
+	* xdelta 3.0e: Performance and bug fixes.
diff --git a/xdelta3/badcopy.c b/xdelta3/badcopy.c
new file mode 100755
index 0000000..c42e2b5
--- /dev/null
+++ b/xdelta3/badcopy.c
@@ -0,0 +1,111 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define BUFSZ (1 << 22)
+
+typedef unsigned int usize_t;
+
+double error_prob   = 0.0001;
+usize_t mean_change  = 100;
+usize_t total_change = 0;
+usize_t total_size   = 0;
+usize_t max_change   = 0;
+usize_t num_change   = 0;
+
+int last_end = 0;
+
+static int
+edist (usize_t mean, usize_t max)
+{
+  double mean_d = mean;
+  double erand  = log (1.0 / drand48 ());
+  usize_t x = (usize_t) (mean_d * erand + 0.5);
+
+  return (x < max) ? (x > 0 ? x : 1) : max;
+}
+
+void modify (char *buf, usize_t size)
+{
+  usize_t bufpos = 0, j;
+
+  last_end = 0;
+
+  for (;; /* bufpos and j are incremented in the inner loop */)
+    {
+      /* The size of the next modification. */
+      usize_t next_size = edist (mean_change, 1 << 31);
+      /* The expected interval of such a change. */
+      double expect_interval = ((double) next_size * (1.0 - error_prob)) / error_prob;
+      /* The number of bytes until the next modification. */
+      usize_t next_mod  = edist (expect_interval, 1 << 31);
+
+      if (next_size + next_mod + bufpos > size) { break; }
+
+      if (max_change < next_size) { max_change = next_size; }
+
+      bufpos += next_mod;
+
+      fprintf (stderr, "COPY: %u-%u (%u)\n", total_size + last_end, total_size + bufpos, bufpos - last_end);
+
+      fprintf (stderr, "ADD:  %u-%u (%u) is change %u\n", total_size + bufpos , total_size + bufpos + next_size, next_size, num_change);
+
+      total_change += next_size;
+      num_change   += 1;
+
+      for (j = 0; j < next_size; j += 1, bufpos += 1)
+	{
+	  buf[bufpos] = lrand48 () >> 3;
+	}
+
+      last_end = bufpos;
+    }
+
+  fprintf (stderr, "COPY: %u-%u (%u)\n", total_size + last_end, total_size + size, size - last_end);
+
+  total_size += size;
+}
+
+int main(int argc, char **argv)
+{
+  char buf[BUFSZ];
+  int c, ret;
+
+  if (argc > 3)
+    {
+      fprintf (stderr, "usage: badcopy [byte_error_prob [mean_error_size]]\n");
+      return 1;
+    }
+
+  if (argc > 2) { mean_change = atoi (argv[2]); }
+  if (argc > 1) { error_prob  = atof (argv[1]); }
+
+  if (error_prob < 0.0 || error_prob > 1.0)
+    {
+      fprintf (stderr, "warning: error probability out of range\n");
+      return 1;
+    }
+
+  do
+    {
+      c = fread (buf, 1, BUFSZ, stdin);
+
+      if (c == 0) { break; }
+
+      modify (buf, c);
+
+      ret = fwrite (buf, 1, c, stdout);
+    }
+  while (c == BUFSZ);
+
+  if ((ret = fclose (stdout)))
+    {
+      perror ("fclose");
+      return 1;
+    }
+
+  fprintf (stderr, "add_prob %f; %u adds; total_change %u of %u bytes; add percentage %f; max add size %u\n",
+	   error_prob, num_change, total_change, total_size, (double) total_change / (double) total_size, max_change);
+
+  return 0;
+}
diff --git a/xdelta3/draft-korn-vcdiff.txt b/xdelta3/draft-korn-vcdiff.txt
new file mode 100755
index 0000000..1487deb
--- /dev/null
+++ b/xdelta3/draft-korn-vcdiff.txt
@@ -0,0 +1,1322 @@
+                                                     David G. Korn, AT&T Labs
+				             Joshua P. MacDonald, UC Berkeley
+                                                 Jeffrey C. Mogul, Compaq WRL
+Internet-Draft                                       Kiem-Phong Vo, AT&T Labs
+Expires: 09 November 2002                                    09 November 2001
+
+
+        The VCDIFF Generic Differencing and Compression Data Format
+
+                         draft-korn-vcdiff-06.txt
+
+
+
+Status of this Memo
+
+    This document is an Internet-Draft and is in full conformance
+    with all provisions of Section 10 of RFC2026.
+
+    Internet-Drafts are working documents of the Internet Engineering
+    Task Force (IETF), its areas, and its working groups.  Note that
+    other groups may also distribute working documents as
+    Internet-Drafts.
+
+    Internet-Drafts are draft documents valid for a maximum of six
+    months and may be updated, replaced, or obsoleted by other
+    documents at any time.  It is inappropriate to use Internet-
+    Drafts as reference material or to cite them other than as
+    "work in progress."
+
+    The list of current Internet-Drafts can be accessed at
+    http://www.ietf.org/ietf/1id-abstracts.txt
+
+    The list of Internet-Draft Shadow Directories can be accessed at
+    http://www.ietf.org/shadow.html.
+
+
+Abstract
+
+    This memo describes a general, efficient and portable data format
+    suitable for encoding compressed and/or differencing data so that
+    they can be easily transported among computers.
+
+
+Table of Contents:
+
+    1.  EXECUTIVE SUMMARY ............................................  2
+    2.  CONVENTIONS ..................................................  3
+    3.  DELTA INSTRUCTIONS ...........................................  4
+    4.  DELTA FILE ORGANIZATION ......................................  5
+    5.  DELTA INSTRUCTION ENCODING ...................................  9
+    6.  DECODING A TARGET WINDOW ..................................... 14
+    7.  APPLICATION-DEFINED CODE TABLES .............................. 16
+    8.  PERFORMANCE .................................................. 16
+    9.  FURTHER ISSUES ............................................... 17
+   10.  SUMMARY ...................................................... 18
+   11.  ACKNOWLEDGEMENTS ............................................. 18
+   12.  SECURITY CONSIDERATIONS ...................................... 18
+   13.  SOURCE CODE AVAILABILITY ..................................... 18
+   14.  INTELLECTUAL PROPERTY RIGHTS ................................. 18
+   15.  IANA CONSIDERATIONS .......................................... 19
+   16.  REFERENCES ................................................... 19
+   17.  AUTHOR'S ADDRESS ............................................. 20
+
+
+1.  EXECUTIVE SUMMARY
+
+    Compression and differencing techniques can greatly improve storage
+    and transmission of files and file versions.  Since files are often
+    transported across machines with distinct architectures and performance
+    characteristics, such data should be encoded in a form that is portable
+    and can be decoded with little or no knowledge of the encoders.
+    This document describes Vcdiff, a compact portable encoding format
+    designed for these purposes.
+
+    Data differencing is the process of computing a compact and invertible
+    encoding of a "target file" given a "source file".  Data compression
+    is similar but without the use of source data.  The UNIX utilities diff,
+    compress, and gzip are well-known examples of data differencing and
+    compression tools.  For data differencing, the computed encoding is
+    called a "delta file", and, for data compression, it is called
+    a "compressed file".  Delta and compressed files are good for storage
+    and transmission as they are often smaller than the originals.
+
+    Data differencing and data compression are traditionally treated
+    as distinct types of data processing.  However, as shown in the Vdelta
+    technique by Korn and Vo [1], compression can be thought of as a special
+    case of differencing in which the source data is empty. The basic idea
+    is to unify the string parsing scheme used in the Lempel-Ziv'77 style
+    compressors [2], and the block-move technique of Tichy [3].  Loosely
+    speaking, this works as follows:
+
+        a. Concatenate source and target data.
+        b. Parse the data from left to right as in LZ'77 but
+	   make sure that a parsed segment starts the target data.
+        c. Start to output when reaching target data.
+
+    Parsing is based on string matching algorithms such as suffix trees [4]
+    or hashing with different time and space performance characteristics.
+    Vdelta uses a fast string matching algorithm that requires less memory
+    than other techniques [5,6].  However, even with this algorithm, the
+    memory requirement can still be prohibitive for large files.  A common
+    way to deal with memory limitation is to partition an input file into
+    chunks called "windows" and process them separately. Here, except for
+    unpublished work by Vo, little has been done on designing effective
+    windowing schemes. Current techniques, including Vdelta, simply use
+    source and target windows with corresponding addresses across source
+    and target files.
+
+    String matching and windowing algorithms have large influence on the
+    compression rate of delta and compressed files. However, it is desirable
+    to have a portable encoding format that is independent of such algorithms.
+    This enables construction of client-server applications in which a server
+    may serve clients with unknown computing characteristics.  Unfortunately,
+    all current differencing and compressing tools, including Vdelta, fall
+    short in this respect. Their storage formats are closely intertwined
+    with the implemented string matching and/or windowing algorithms.
+
+    The encoding format Vcdiff proposed here addresses the above issues.
+    Vcdiff achieves the below characteristics:
+
+	Output compactness:
+            The basic encoding format compactly represents compressed or delta
+	    files. Applications can further extend the basic encoding format
+	    with "secondary encoders" to achieve more compression.
+
+	Data portability:
+	    The basic encoding format is free from machine byte order and
+	    word size issues. This allows data to be encoded on one machine
+	    and decoded on a different machine with different architecture.
+
+    	Algorithm genericity:
+	    The decoding algorithm is independent from string matching and
+	    windowing algorithms. This allows competition among implementations
+	    of the encoder while keeping the same decoder.
+
+    	Decoding efficiency:
+	    Except for secondary encoder issues, the decoding algorithm runs
+	    in time proportional to the size of the target file and uses space
+	    proportional to the maximal window size.  Vcdiff differs from more
+	    conventional compressors in that it uses only byte-aligned
+	    data, thus avoiding bit-level operations, which improves
+	    decoding speed at the slight cost of compression efficiency.
+
+    The Vcdiff data format and the algorithms for decoding data shall be
+    described next.  Since Vcdiff treats compression as a special case of
+    differencing, we shall use the term "delta file" to indicate the
+    compressed output for both cases.
+
+
+2. CONVENTIONS
+
+    The basic data unit is a byte.  For portability, Vcdiff shall limit
+    a byte to its lower eight bits even on machines with larger bytes.
+    The bits in a byte are ordered from right to left so that the least
+    significant bit (LSB) has value 1, and the most significant bit (MSB),
+    has value 128.
+
+    For purposes of exposition in this document, we adopt the convention
+    that the LSB is numbered 0, and the MSB is numbered 7.  Bit numbers
+    never appear in the encoded format itself.
+
+    Vcdiff encodes unsigned integer values using a portable variable-sized
+    format (originally introduced in the Sfio library [7]). This encoding
+    treats an integer as a number in base 128. Then, each digit in this
+    representation is encoded in the lower seven bits of a byte. Except for
+    the least significant byte, other bytes have their most significant bit
+    turned on to indicate that there are still more digits in the encoding.
+    The two key properties of this integer encoding that are beneficial
+    to a data compression format are:
+
+	a. The encoding is portable among systems using 8-bit bytes, and
+        b. Small values are encoded compactly.
+
+    For example, consider the value 123456789 which can be represented with
+    four 7-bit digits whose values are 58, 111, 26, 21 in order from most
+    to least significant. Below is the 8-bit byte encoding of these digits.
+    Note that the MSBs of 58, 111 and 26 are on.
+
+                 +-------------------------------------------+
+                 | 10111010 | 11101111 | 10011010 | 00010101 |
+                 +-------------------------------------------+
+                   MSB+58     MSB+111    MSB+26     0+21
+
+
+    Henceforth, the terms "byte" and "integer" will refer to a byte and an
+    unsigned integer as described.
+
+
+    From time to time, algorithms are exhibited to clarify the descriptions
+    of parts of the Vcdiff format. On such occasions, the C language will be
+    used to make precise the algorithms.  The C code shown in this
+    document is meant for clarification only, and is not part of the
+    actual specification of the Vcdiff format.
+
+    In this specification, the key words "MUST", "MUST NOT",
+    "SHOULD", "SHOULD NOT", and "MAY" document are to be interpreted as
+    described in RFC2119 [12].
+
+
+3.  DELTA INSTRUCTIONS
+
+    A large target file is partitioned into non-overlapping sections
+    called "target windows". These target windows are processed separately
+    and sequentially based on their order in the target file.
+
+    A target window T of length t may be compared against some source data
+    segment S of length s. By construction, this source data segment S
+    comes either from the source file, if one is used, or from a part of
+    the target file earlier than T.  In this way, during decoding, S is
+    completely known when T is being decoded.
+
+    The choices of T, t, S and s are made by some window selection algorithm
+    which can greatly affect the size of the encoding. However, as seen later,
+    these choices are encoded so that no knowledge of the window selection
+    algorithm is needed during decoding.
+
+    Assume that S[j] represents the jth byte in S, and T[k] represents
+    the kth byte in T.  Then, for the delta instructions, we treat the data
+    windows S and T as substrings of a superstring U formed by concatenating
+    them like this:
+
+        S[0]S[1]...S[s-1]T[0]T[1]...T[t-1]
+
+    The "address" of a byte in S or T is referred to by its location in U.
+    For example, the address of T[k] is s+k.
+
+    The instructions to encode and direct the reconstruction of a target
+    window are called delta instructions. There are three types:
+
+	ADD: This instruction has two arguments, a size x and a sequence of
+	    x bytes to be copied.
+	COPY: This instruction has two arguments, a size x and an address p
+	    in the string U. The arguments specify the substring of U that
+	    must be copied. We shall assert that such a substring must be
+	    entirely contained in either S or T.
+	RUN: This instruction has two arguments, a size x and a byte b that
+	    will be repeated x times.
+
+    Below are example source and target windows and the delta instructions
+    that encode the target window in terms of the source window.
+
+        a b c d e f g h i j k l m n o p
+        a b c d w x y z e f g h e f g h e f g h e f g h z z z z
+
+        COPY  4, 0
+        ADD   4, w x y z
+        COPY  4, 4
+        COPY 12, 24
+	RUN   4, z
+
+
+    Thus, the first letter 'a' in the target window is at location 16
+    in the superstring. Note that the fourth instruction, "COPY 12, 24",
+    copies data from T itself since address 24 is position 8 in T.
+    This instruction also shows that it is fine to overlap the data to be
+    copied with the data being copied from as long as the latter starts
+    earlier. This enables efficient encoding of periodic sequences,
+    i.e., sequences with regularly repeated subsequences. The RUN instruction
+    is a compact way to encode a sequence repeating the same byte even though
+    such a sequence can be thought of as a periodic sequence with period 1.
+
+    To reconstruct the target window, one simply processes one delta
+    instruction at a time and copy the data either from the source window
+    or the being reconstructed target window based on the type of the
+    instruction and the associated address, if any.
+
+
+4.  DELTA FILE ORGANIZATION
+
+    A Vcdiff delta file starts with a Header section followed by a sequence
+    of Window sections. The Header section includes magic bytes to identify
+    the file type, and information concerning data processing beyond the
+    basic encoding format. The Window sections encode the target windows.
+
+    Below is the overall organization of a delta file. The indented items
+    refine the ones immediately above them. An item in square brackets may
+    or may not be present in the file depending on the information encoded
+    in the Indicator byte above it.
+
+        Header
+	    Header1                                  - byte
+	    Header2                                  - byte
+	    Header3                                  - byte
+	    Header4                                  - byte
+	    Hdr_Indicator                            - byte
+	    [Secondary compressor ID]                - byte
+
+[@@@ Why is compressor ID not an integer? ]
+[@@@ If we aren't defining any secondary compressors yet, then it seems
+that defining the [Secondary compressor ID] and the corresponding
+VCD_DECOMPRESS Hdr_Indicator bit in this draft has no real value.  An
+implementation of this specification won't be able to decode a VCDIFF
+encoded with this option if it doesn't know about any secondary
+compressors.  It seems that you should specify the bits related to
+secondary compressors once you have defined the first a secondary
+compressor.  I can imagine a secondary-compressor might want to supply
+extra information, such as a dictionary of some kind, in which case
+this speculative treatment wouldn't go far enough.]
+
+	    [Length of code table data]              - integer
+	    [Code table data]
+	      	Size of near cache                   - byte
+	        Size of same cache                   - byte
+	        Compressed code table data
+	Window1
+	    Win_Indicator                            - byte
+	    [Source segment size]                    - integer
+	    [Source segment position]                - integer
+            The delta encoding of the target window
+	        Length of the delta encoding         - integer
+	        The delta encoding
+	            Size of the target window        - integer
+	            Delta_Indicator                  - byte
+	            Length of data for ADDs and RUNs - integer
+	            Length of instructions and sizes - integer
+	            Length of addresses for COPYs    - integer
+	            Data section for ADDs and RUNs   - array of bytes
+	            Instructions and sizes section   - array of bytes
+	            Addresses section for COPYs      - array of bytes
+	Window2
+	...
+
+
+
+4.1 The Header Section
+
+    Each delta file starts with a header section organized as below.
+    Note the convention that square-brackets enclose optional items.
+
+	    Header1                                  - byte = 0xE6
+	    Header2                                  - byte = 0xD3
+	    Header3                                  - byte = 0xD4
+
+HMMM
+
+0xD6
+0xC3
+0xC4
+
+	    Header4                                  - byte
+	    Hdr_Indicator                            - byte
+	    [Secondary compressor ID]                - byte
+	    [Length of code table data]              - integer
+	    [Code table data]
+
+    The first three Header bytes are the ASCII characters 'V', 'C' and 'D'
+    with their most significant bits turned on (in hexadecimal, the values
+    are 0xE6, 0xD3, and 0xD4). The fourth Header byte is currently set to
+    zero. In the future, it might be used to indicate the version of Vcdiff.
+
+    The Hdr_Indicator byte shows if there are any initialization data
+    required to aid in the reconstruction of data in the Window sections.
+    This byte MAY have non-zero values for either, both, or neither of
+    the two bits VCD_DECOMPRESS and VCD_CODETABLE below:
+
+	    7 6 5 4 3 2 1 0
+	   +-+-+-+-+-+-+-+-+
+	   | | | | | | | | |
+	   +-+-+-+-+-+-+-+-+
+	                ^ ^
+	                | |
+	                | +-- VCD_DECOMPRESS
+	                +---- VCD_CODETABLE
+
+    If bit 0 (VCD_DECOMPRESS) is non-zero, this indicates that a secondary
+    compressor may have been used to further compress certain parts of the
+    delta encoding data as described in Sections 4.3 and 6. In that case,
+    the ID of the secondary compressor is given next. If this bit is zero,
+    the compressor ID byte is not included.
+
+[@@@ If we aren't defining any secondary compressors yet, then it seems
+this bit has no real value yet..]
+
+    If bit 1 (VCD_CODETABLE) is non-zero, this indicates that an
+    application-defined code table is to be used for decoding the delta
+    instructions. This table itself is compressed.  The length of the data
+    comprising this compressed code table and the data follow next. Section 7
+    discusses application-defined code tables.  If this bit is zero, the code
+    table data length and the code table data are not included.
+
+    If both bits are set, then the compressor ID byte is included
+    before the code table data length and the code table data.
+
+
+4.2 The Format of a Window Section
+
+    Each Window section is organized as follows:
+
+	    Win_Indicator                            - byte
+	    [Source segment length]                  - integer
+	    [Source segment position]                - integer
+            The delta encoding of the target window
+
+
+    Below are the detail of the various items:
+
+[@@@ Here, I want to replace the Win_Indicator with a source-count,
+followed by source-count length/position pairs?]
+
+        Win_Indicator:
+	    This byte is a set of bits, as shown:
+
+	    7 6 5 4 3 2 1 0
+	   +-+-+-+-+-+-+-+-+
+	   | | | | | | | | |
+	   +-+-+-+-+-+-+-+-+
+	                ^ ^
+	                | |
+	                | +-- VCD_SOURCE
+	                +---- VCD_TARGET
+
+
+	    If bit 0 (VCD_SOURCE) is non-zero, this indicates that a segment
+            of data from the "source" file was used as the corresponding
+            source window of data to encode the target window. The decoder
+	    will use this same source data segment to decode the target window.
+
+	    If bit 1 (VCD_TARGET) is non-zero, this indicates that a segment
+            of data from the "target" file was used as the corresponding
+	    source window of data to encode the target window. As above, this
+	    same source data segment is used to decode the target window.
+
+	    The Win_Indicator byte MUST NOT have more than one of the bits
+	    set (non-zero).  It MAY have none of these bits set.
+
+	    If one of these bits is set, the byte is followed by two
+            integers to indicate respectively the length and position of
+            the source data segment in the relevant file.  If the
+            indicator byte is zero, the target window was compressed
+            by itself without comparing against another data segment,
+            and these two integers are not included.
+
+        The delta encoding of the target window:
+            This contains the delta encoding of the target window either
+            in terms of the source data segment (i.e., VCD_SOURCE
+            or VCD_TARGET was set) or by itself if no source window
+            is specified. This data format is discussed next.
+
+
+4.3 The Delta Encoding of a Target Window
+
+    The delta encoding of a target window is organized as follows:
+
+	Length of the delta encoding            - integer
+	The delta encoding
+	    Length of the target window         - integer
+	    Delta_Indicator                     - byte
+	    Length of data for ADDs and RUNs    - integer
+	    Length of instructions section      - integer
+	    Length of addresses for COPYs       - integer
+	    Data section for ADDs and RUNs      - array of bytes
+	    Instructions and sizes section      - array of bytes
+	    Addresses section for COPYs         - array of bytes
+
+
+	Length of the delta encoding:
+	    This integer gives the total number of remaining bytes that
+	    comprise data of the delta encoding for this target window.
+
+        The delta encoding:
+	    This contains the data representing the delta encoding which
+	    is described next.
+
+    	Length of the target window:
+	    This integer indicates the actual size of the target window
+            after decompression. A decoder can use this value to allocate
+            memory to store the uncompressed data.
+
+	Delta_Indicator:
+	    This byte is a set of bits, as shown:
+
+	    7 6 5 4 3 2 1 0
+	   +-+-+-+-+-+-+-+-+
+	   | | | | | | | | |
+	   +-+-+-+-+-+-+-+-+
+	              ^ ^ ^
+	              | | |
+	              | | +-- VCD_DATACOMP
+	              | +---- VCD_INSTCOMP
+	              +------ VCD_ADDRCOMP
+
+		VCD_DATACOMP:	bit value 1.
+		VCD_INSTCOMP:	bit value 2.
+		VCD_ADDRCOMP:	bit value 4.
+
+            As discussed, the delta encoding consists of COPY, ADD and RUN
+            instructions. The ADD and RUN instructions have accompanying
+            unmatched data (that is, data that does not specifically match
+            any data in the source window or in some earlier part of the
+            target window) and the COPY instructions have addresses of where
+	    the matches occur. OPTIONALLY, these types of data MAY be further
+	    compressed using a secondary compressor. Thus, Vcdiff separates
+            the encoding of the delta instructions into three parts:
+
+	        a. The unmatched data in the ADD and RUN instructions,
+	        b. The delta instructions and accompanying sizes, and
+                c. The addresses of the COPY instructions.
+
+            If the bit VCD_DECOMPRESS (Section 4.1) was on, each of these
+            sections may have been compressed using the specified secondary
+            compressor. The bit positions 0 (VCD_DATACOMP), 1 (VCD_INSTCOMP),
+            and 2 (VCD_ADDRCOMP) respectively indicate, if non-zero, that
+            the corresponding parts are compressed. Then, these parts MUST
+	    be decompressed before decoding the delta instructions.
+
+	Length of data for ADDs and RUNs:
+	    This is the length (in bytes) of the section of data storing
+            the unmatched data accompanying the ADD and RUN instructions.
+
+	Length of instructions section:
+	    This is the length (in bytes) of the delta instructions and
+            accompanying sizes.
+
+	Length of addresses for COPYs:
+	    This is the length (in bytes) of the section storing
+            the addresses of the COPY instructions.
+
+    	Data section for ADDs and RUNs:
+	    This sequence of bytes encodes the unmatched data for the ADD
+            and RUN instructions.
+
+	Instructions and sizes section:
+	    This sequence of bytes encodes the instructions and their sizes.
+
+	Addresses section for COPYs:
+	    This sequence of bytes encodes the addresses of the COPY
+	    instructions.
+
+
+5. DELTA INSTRUCTION ENCODING
+
+    The delta instructions described in Section 3 represent the results of
+    string matching. For many data differencing applications in which the
+    changes between source and target data are small, any straightforward
+    representation of these instructions would be adequate.  However, for
+    applications including data compression, it is important to encode
+    these instructions well to achieve good compression rates.  From our
+    experience, the following observations can be made:
+
+    a. The addresses in COPY instructions are locations of matches and
+       often occur close by or even exactly equal to one another. This is
+       because data in local regions are often replicated with minor changes.
+       In turn, this means that coding a newly matched address against some
+       set of recently matched addresses can be beneficial.
+
+    b. The matches are often short in length and separated by small amounts
+       of unmatched data. That is, the lengths of COPY and ADD instructions
+       are often small. This is particularly true of binary data such as
+       executable files or structured data such as HTML or XML. In such cases,
+       compression can be improved by combining the encoding of the sizes
+       and the instruction types as well as combining the encoding of adjacent
+       delta instructions with sufficiently small data sizes.
+
+    The below subsections discuss how the Vcdiff data format provides
+    mechanisms enabling encoders to use the above observations to improve
+    compression rates.
+
+
+5.1 Address Encoding Modes of COPY Instructions
+
+    As mentioned earlier, addresses of COPY instructions often occur close
+    to one another or are exactly equal. To take advantage of this phenomenon
+    and encode addresses of COPY instructions more efficiently, the Vcdiff
+    data format supports the use of two different types of address caches.
+    Both the encoder and decoder maintain these caches, so that decoder's
+    caches remain synchronized with the encoder's caches.
+
+    a. A "near" cache is an array with "s_near" slots, each containing an
+       address used for encoding addresses nearby to previously encoded
+       addresses (in the positive direction only).  The near cache also
+       maintains a "next_slot" index to the near cache.  New entries to the
+       near cache are always inserted in the next_slot index, which maintains
+       a circular buffer of the s_near most recent addresses.
+
+    b. A "same" cache is an array with "s_same" multiple of 256 slots, each
+       containing an address.  The same cache maintains a hash table of recent
+       addresses used for repeated encoding of the exact same address.
+
+
+    By default, the parameters s_near and s_same are respectively set to 4
+    and 3. An encoder MAY modify these values, but then it MUST encode the
+    new values in the encoding itself, as discussed in Section 7, so that
+    the decoder can properly set up its own caches.
+
+    At the start of processing a target window, an implementation
+    (encoder or decoder) initializes all of the slots in both caches
+    to zero.  The next_slot pointer of the near cache is set
+    to point to slot zero.
+
+    Each time a COPY instruction is processed by the encoder or
+    decoder, the implementation's caches are updated as follows, where
+    "addr" is the address in the COPY instruction.
+
+    a. The slot in the near cache referenced by the next_slot
+       index is set to addr.  The next_slot index is then incremented
+       modulo s_near.
+
+    b. The slot in the same cache whose index is addr%(s_same*256)
+       is set to addr. [We use the C notations of % for modulo and
+       * for multiplication.]
+
+
+5.2 Example code for maintaining caches
+
+    To make clear the above description, below are example cache data
+    structures and algorithms to initialize and update them:
+
+        typedef struct _cache_s
+        {
+	    int*  near;      /* array of size s_near        */
+            int   s_near;
+            int   next_slot; /* the circular index for near */
+            int*  same;      /* array of size s_same*256    */
+            int   s_same;
+        } Cache_t;
+
+        cache_init(Cache_t* ka)
+        {
+	    int   i;
+
+            ka->next_slot = 0;
+            for(i = 0; i < ka->s_near; ++i)
+                 ka->near[i] = 0;
+
+            for(i = 0; i < ka->s_same*256; ++i)
+                 ka->same[i] = 0;
+        }
+
+        cache_update(Cache_t* ka, int addr)
+        {
+	    if(ka->s_near > 0)
+            {   ka->near[ka->next_slot] = addr;
+                ka->next_slot = (ka->next_slot + 1) % ka->s_near;
+            }
+
+            if(ka->s_same > 0)
+                ka->same[addr % (ka->s_same*256)] = addr;
+        }
+
+
+5.3 Encoding of COPY instruction addresses
+
+    The address of a COPY instruction is encoded using different modes
+    depending on the type of cached address used, if any.
+
+    Let "addr" be the address of a COPY instruction to be decoded and "here"
+    be the current location in the target data (i.e., the start of the data
+    about to be encoded or decoded).  Let near[j] be the jth element in
+    the near cache, and same[k] be the kth element in the same cache.
+    Below are the possible address modes:
+
+	VCD_SELF: This mode has value 0. The address was encoded by itself
+            as an integer.
+
+	VCD_HERE: This mode has value 1. The address was encoded as
+	    the integer value "here - addr".
+
+	Near modes: The "near modes" are in the range [2,s_near+1]. Let m
+	    be the mode of the address encoding. The address was encoded
+	    as the integer value "addr - near[m-2]".
+
+	Same modes: The "same modes" are in the range
+	    [s_near+2,s_near+s_same+1]. Let m be the mode of the encoding.
+	    The address was encoded as a single byte b such that
+	    "addr == same[(m - (s_near+2))*256 + b]".
+
+
+5.3 Example code for encoding and decoding of COPY instruction addresses
+
+    We show example algorithms below to demonstrate use of address modes more
+    clearly. The encoder has freedom to choose address modes, the sample
+    addr_encode() algorithm merely shows one way of picking the address
+    mode. The decoding algorithm addr_decode() will uniquely decode addresses
+    regardless of the encoder's algorithm choice.
+
+    Note that the address caches are updated immediately after an address is
+    encoded or decoded. In this way, the decoder is always synchronized with
+    the encoder.
+
+        int addr_encode(Cache_t* ka, int addr, int here, int* mode)
+        {
+	    int  i, d, bestd, bestm;
+
+	    /* Attempt to find the address mode that yields the
+	     * smallest integer value for "d", the encoded address
+	     * value, thereby minimizing the encoded size of the
+	     * address. */
+
+            bestd = addr; bestm = VCD_SELF;      /* VCD_SELF == 0 */
+
+            if((d = here-addr) < bestd)
+                { bestd = d; bestm = VCD_HERE; } /* VCD_HERE == 1 */
+
+            for(i = 0; i < ka->s_near; ++i)
+                if((d = addr - ka->near[i]) >= 0 && d < bestd)
+                    { bestd = d; bestm = i+2; }
+
+            if(ka->s_same > 0 && ka->same[d = addr%(ka->s_same*256)] == addr)
+                { bestd = d%256; bestm = ka->s_near + 2 + d/256; }
+
+            cache_update(ka,addr);
+
+            *mode = bestm; /* this returns the address encoding mode */
+            return  bestd; /* this returns the encoded address       */
+        }
+
+    Note that the addr_encode() algorithm chooses the best address mode using a
+    local optimization, but that may not lead to the best encoding efficiency
+    because different modes lead to different instruction encodings, as    described below.
+
+    The functions addrint() and addrbyte() used in addr_decode() obtain from
+    the "Addresses section for COPYs" (Section 4.3) an integer or a byte,
+    respectively. These utilities will not be described here.  We simply
+    recall that an integer is represented as a compact variable-sized string
+    of bytes as described in Section 2 (i.e., base 128).
+
+        int addr_decode(Cache_t* ka, int here, int mode)
+        {   int  addr, m;
+
+            if(mode == VCD_SELF)
+                 addr = addrint();
+            else if(mode == VCD_HERE)
+                 addr = here - addrint();
+            else if((m = mode - 2) >= 0 && m < ka->s_near) /* near cache */
+                 addr = ka->near[m] + addrint();
+            else /* same cache */
+            {    m = mode - (2 + ka->s_near);
+                 addr = ka->same[m*256 + addrbyte()];
+            }
+
+            cache_update(ka, addr);
+
+            return addr;
+        }
+
+
+5.4 Instruction Codes
+
+    As noted, the data sizes associated with delta instructions are often
+    small. Thus, compression efficiency can be improved by combining the sizes
+    and instruction types in a single encoding, as well by combining certain
+    pairs of adjacent delta instructions. Effective choices of when to perform
+    such combinations depend on many factors including the data being processed
+    and the string matching algorithm in use. For example, if many COPY
+    instructions have the same data sizes, it may be worth to encode these
+    instructions more compactly than others.
+
+    The Vcdiff data format is designed so that a decoder does not need to be
+    aware of the choices made in encoding algorithms. This is achieved with the
+    notion of an "instruction code table" containing 256 entries. Each entry
+    defines either a single delta instruction or a pair of instructions that
+    have been combined.  Note that the code table itself only exists in main
+    memory, not in the delta file (unless using an application-defined code
+    table, described in Section 7). The encoded data simply includes the index
+    of each instruction and, since there are only 256 indices, each index
+    can be represented as a single byte.
+
+    Each instruction code entry contains six fields, each of which
+    is a single byte with unsigned value:
+
+            +-----------------------------------------------+
+	    | inst1 | size1 | mode1 | inst2 | size2 | mode2 |
+	    +-----------------------------------------------+
+
+@@@ could be more compact
+
+    Each triple (inst,size,mode) defines a delta instruction. The meanings
+    of these fields are as follows:
+
+    inst: An "inst" field can have one of the four values: NOOP (0), ADD (1),
+	RUN (2) or COPY (3) to indicate the instruction types. NOOP means
+	that no instruction is specified. In this case, both the corresponding
+	size and mode fields will be zero.
+
+    size: A "size" field is zero or positive. A value zero means that the
+	size associated with the instruction is encoded separately as
+	an integer in the "Instructions and sizes section" (Section 6).
+	A positive value for "size" defines the actual data size.
+	Note that since the size is restricted to a byte, the maximum
+	value for any instruction with size implicitly defined in the code
+	table is 255.
+
+    mode: A "mode" field is significant only when the associated delta
+	instruction is a COPY. It defines the mode used to encode the
+	associated addresses. For other instructions, this is always zero.
+
+
+5.5 The Code Table
+
+    Following the discussions on address modes and instruction code tables,
+    we define a "Code Table" to have the data below:
+
+	s_near: the size of the near cache,
+	s_same: the size of the same cache,
+	i_code: the 256-entry instruction code table.
+
+    Vcdiff itself defines a "default code table" in which s_near is 4
+    and s_same is 3. Thus, there are 9 address modes for a COPY instruction.
+    The first two are VCD_SELF (0) and VCD_HERE (1). Modes 2, 3, 4 and 5
+    are for addresses coded against the near cache. And, modes 6, 7  and 8
+    are for addresses coded against the same cache.
+
+    The default instruction code table is depicted below, in a compact
+    representation that we use only for descriptive purposes.  See section 7
+    for the specification of how an instruction code table is represented
+    in the Vcdiff encoding format.  In the depiction, a zero value for
+    size indicates that the size is separately coded. The mode of non-COPY
+    instructions is represented as 0 even though they are not used.
+
+
+         TYPE      SIZE     MODE    TYPE     SIZE     MODE     INDEX
+        ---------------------------------------------------------------
+     1.  RUN         0        0     NOOP       0        0        0
+     2.  ADD    0, [1,17]     0     NOOP       0        0      [1,18]
+     3.  COPY   0, [4,18]     0     NOOP       0        0     [19,34]
+     4.  COPY   0, [4,18]     1     NOOP       0        0     [35,50]
+     5.  COPY   0, [4,18]     2     NOOP       0        0     [51,66]
+     6.  COPY   0, [4,18]     3     NOOP       0        0     [67,82]
+     7.  COPY   0, [4,18]     4     NOOP       0        0     [83,98]
+     8.  COPY   0, [4,18]     5     NOOP       0        0     [99,114]
+     9.  COPY   0, [4,18]     6     NOOP       0        0    [115,130]
+    10.  COPY   0, [4,18]     7     NOOP       0        0    [131,146]
+    11.  COPY   0, [4,18]     8     NOOP       0        0    [147,162]
+    12.  ADD       [1,4]      0     COPY     [4,6]      0    [163,174]
+    13.  ADD       [1,4]      0     COPY     [4,6]      1    [175,186]
+    14.  ADD       [1,4]      0     COPY     [4,6]      2    [187,198]
+    15.  ADD       [1,4]      0     COPY     [4,6]      3    [199,210]
+    16.  ADD       [1,4]      0     COPY     [4,6]      4    [211,222]
+    17.  ADD       [1,4]      0     COPY     [4,6]      5    [223,234]
+    18.  ADD       [1,4]      0     COPY       4        6    [235,238]
+    19.  ADD       [1,4]      0     COPY       4        7    [239,242]
+    20.  ADD       [1,4]      0     COPY       4        8    [243,246]
+    21.  COPY        4      [0,8]   ADD        1        0    [247,255]
+        ---------------------------------------------------------------
+
+    In the above depiction, each numbered line represents one or more
+    entries in the actual instruction code table (recall that an entry in
+    the instruction code table may represent up to two combined delta
+    instructions.) The last column ("INDEX") shows which index value or
+    range of index values of the entries covered by that line. The notation
+    [i,j] means values from i through j, inclusive. The first 6 columns of
+    a line in the depiction describe the pairs of instructions used for
+    the corresponding index value(s).
+
+    If a line in the depiction includes a column entry using the [i,j]
+    notation, this means that the line is instantiated for each value
+    in the range from i to j, inclusive.  The notation "0, [i,j]" means
+    that the line is instantiated for the value 0 and for each value
+    in the range from i to j, inclusive.
+
+    If a line in the depiction includes more than one entry using the [i,j]
+    notation, implying a "nested loop" to convert the line to a range of
+    table entries, the first such [i,j] range specifies the outer loop,
+    and the second specifies the inner loop.
+
+    The below examples should make clear the above description:
+
+    Line 1 shows the single RUN instruction with index 0. As the size field
+    is 0, this RUN instruction always has its actual size encoded separately.
+
+    Line 2 shows the 18 single ADD instructions. The ADD instruction with
+    size field 0 (i.e., the actual size is coded separately) has index 1.
+    ADD instructions with sizes from 1 to 17 use code indices 2 to 18 and
+    their sizes are as given (so they will not be separately encoded.)
+
+    Following the single ADD instructions are the single COPY instructions
+    ordered by their address encoding modes. For example, line 11 shows the
+    COPY instructions with mode 8, i.e., the last of the same cache.
+    In this case, the COPY instruction with size field 0 has index 147.
+    Again, the actual size of this instruction will be coded separately.
+
+    Lines 12 to 21 show the pairs of instructions that are combined together.
+    For example, line 12 depicts the 12 entries in which an ADD instruction
+    is combined with an immediately following COPY instruction. The entries
+    with indices 163, 164, 165 represent the pairs in which the ADD
+    instructions all have size 1 while the COPY instructions has mode
+    0 (VCD_SELF) and sizes 4, 5 and 6 respectively.
+
+    The last line, line 21, shows the eight instruction pairs where the first
+    instruction is a COPY and the second is an ADD. In this case, all COPY
+    instructions have size 4 with mode ranging from 0 to 8 and all the ADD
+    instructions have size 1. Thus, the entry with largest index 255
+    combines a COPY instruction of size 4 and mode 8 with an ADD instruction
+    of size 1.
+
+    The choice of the minimum size 4 for COPY instructions in the default code
+    table was made from experiments that showed that excluding small matches
+    (less then 4 bytes long) improved the compression rates.
+
+
+6. DECODING A TARGET WINDOW
+
+    Section 4.3 discusses that the delta instructions and associated data
+    are encoded in three arrays of bytes:
+
+        Data section for ADDs and RUNs,
+        Instructions and sizes section, and
+        Addresses section for COPYs.
+
+
+    Further, these data sections may have been further compressed by some
+    secondary compressor. Assuming that any such compressed data has been
+    decompressed so that we now have three arrays:
+
+	inst: bytes coding the instructions and sizes.
+        data: unmatched data associated with ADDs and RUNs.
+	addr: bytes coding the addresses of COPYs.
+
+    These arrays are organized as follows:
+
+	inst:
+	    a sequence of (index, [size1], [size2]) tuples, where "index"
+            is an index into the instruction code table, and size1 and size2
+            are integers that MAY or MAY NOT be included in the tuple as
+            follows. The entry with the given "index" in the instruction
+            code table potentially defines two delta instructions. If the
+            first delta instruction is not a VCD_NOOP and its size is zero,
+            then size1 MUST be present. Otherwise, size1 MUST be omitted and
+            the size of the instruction (if it is not VCD_NOOP) is as defined
+            in the table. The presence or absence of size2 is defined
+            similarly with respect to the second delta instruction.
+
+	data:
+	    a sequence of data values, encoded as bytes.
+
+	addr:
+	    a sequence of address values. Addresses are normally encoded as
+            integers as described in Section 2 (i.e., base 128).
+	    Since the same cache emits addresses in the range [0,255],
+	    however, same cache addresses are always encoded as a
+	    single byte.
+
+    To summarize, each tuple in the "inst" array includes an index to some
+    entry in the instruction code table that determines:
+
+    a. Whether one or two instructions were encoded and their types.
+
+    b. If the instructions have their sizes encoded separately, these
+       sizes will follow, in order, in the tuple.
+
+    c. If the instructions have accompanying data, i.e., ADDs or RUNs,
+       their data will be in the array "data".
+
+    d. Similarly, if the instructions are COPYs, the coded addresses are
+       found in the array "addr".
+
+    The decoding procedure simply processes the arrays by reading one code
+    index at a time, looking up the corresponding instruction code entry,
+    then consuming the respective sizes, data and addresses following the
+    directions in this entry. In other words, the decoder maintains an implicit
+    next-element pointer for each array; "consuming" an instruction tuple,
+    data, or address value implies incrementing the associated pointer.
+
+    For example, if during the processing of the target window, the next
+    unconsumed tuple in the inst array has index value 19, then the first
+    instruction is a COPY, whose size is found as the immediately following
+    integer in the inst array.  Since the mode of this COPY instruction is
+    VCD_SELF, the corresponding address is found by consuming the next
+    integer in the addr array.  The data array is left intact. As the second
+    instruction for code index 19 is a NOOP, this tuple is finished.
+
+
+7. APPLICATION-DEFINED CODE TABLES
+
+    Although the default code table used in Vcdiff is good for general
+    purpose encoders, there are times when other code tables may perform
+    better. For example, to code a file with many identical segments of data,
+    it may be advantageous to have a COPY instruction with the specific size
+    of these data segments so that the instruction can be encoded in a single
+    byte. Such a special code table MUST then be encoded in the delta file
+    so that the decoder can reconstruct it before decoding the data.
+
+    Vcdiff allows an application-defined code table to be specified
+    in a delta file with the following data:
+
+	Size of near cache            - byte
+	Size of same cache            - byte
+	Compressed code table data
+
+    The "compressed code table data" encodes the delta between the default
+    code table (source) and the new code table (target) in the same manner as
+    described in Section 4.3 for encoding a target window in terms of a
+    source window. This delta is computed using the following steps:
+
+    a.  Convert the new instruction code table into a string, "code", of
+	1536 bytes using the below steps in order:
+
+        i. Add in order the 256 bytes representing the types of the first
+	   instructions in the instruction pairs.
+       ii. Add in order the 256 bytes representing the types of the second
+	   instructions in the instruction pairs.
+      iii. Add in order the 256 bytes representing the sizes of the first
+	   instructions in the instruction pairs.
+       iv. Add in order the 256 bytes representing the sizes of the second
+	   instructions in the instruction pairs.
+        v. Add in order the 256 bytes representing the modes of the first
+	   instructions in the instruction pairs.
+       vi. Add in order the 256 bytes representing the modes of the second
+	   instructions in the instruction pairs.
+
+    b.  Similarly, convert the default instruction code table into
+	a string "dflt".
+
+    c.  Treat the string "code" as a target window and "dflt" as the
+	corresponding source data and apply an encoding algorithm to
+	compute the delta encoding of "code" in terms of "dflt".
+	This computation MUST use the default code table for encoding
+	the delta instructions.
+
+    The decoder can then reverse the above steps to decode the compressed
+    table data using the method of Section 6, employing the default code
+    table, to generate the new code table.  Note that the decoder does not
+    need to know anything about the details of the encoding algorithm used
+    in step (c). The decoder is still able to decode the new code table
+    because the Vcdiff format is independent from the choice of encoding
+    algorithm, and because the encoder in step (c) uses the known, default
+    code table.
+
+
+8. PERFORMANCE
+
+    The encoding format is compact. For compression only, using the LZ-77
+    string parsing strategy and without any secondary compressors, the typical
+    compression rate is better than Unix compress and close to gzip.  For
+    differencing, the data format is better than all known methods in
+    terms of its stated goal, which is primarily decoding speed and
+    encoding efficiency.
+
+    We compare the performance of compress, gzip and Vcdiff using the
+    archives of three versions of the Gnu C compiler, gcc-2.95.1.tar,
+    gcc-2.95.2.tar and gcc-2.95.3.tar. The experiments were done on an
+    SGI-MIPS3, 400MHZ. Gzip was used at its default compression level.
+    Vcdiff timings were done using the Vcodex/Vcdiff software (Section 13).
+    As string and window matching typically dominates the computation during
+    compression, the Vcdiff compression times were directly due to the
+    algorithms used in the Vcodex/Vcdiff software. However, the decompression
+    times should be generic and representative of any good implementation
+    of the Vcdiff data format. Timing was done by running each program
+    three times and taking the average of the total cpu+system times.
+
+    Below are the different Vcdiff runs:
+
+	Vcdiff: vcdiff is used as compressor only.
+
+	Vcdiff-d: vcdiff is used as a differencer only. That is, it only
+		compares target data against source data.  Since the files
+		involved are large, they are broken into windows. In this
+		case, each target window starting at some file offset in
+		the target file is compared against a source window with
+		the same file offset (in the source file). The source
+		window is also slightly larger than the target window
+		to increase matching opportunities. The -d option also gives
+		a hint to the string matching algorithm of Vcdiff that
+		the two files are very similar with long stretches of matches.
+		The algorithm takes advantage of this to minimize its
+		processing of source data and save time.
+
+	Vcdiff-dc: This is similar to Vcdiff-d but vcdiff can also compare
+		target data against target data as applicable. Thus, vcdiff
+		both computes differences and compresses data. The windowing
+		algorithm is the same as above. However, the above hint is
+		recinded in this case.
+
+	Vcdiff-dcs: This is similar to Vcdiff-dc but the windowing algorithm
+		uses a content-based heuristic to select source data segments
+		that are more likely to match with a given target window.
+		Thus, the source data segment selected for a target window
+		often will not be aligned with the file offsets of this
+		target window.
+
+
+                gcc-2.95.1    gcc-2.95.2    compression   decompression
+    raw size      55746560      55797760
+    compress         -          19939390       13.85s	      7.09s
+    gzip             -          12973443       42.99s         5.35s
+    Vcdiff           -          15358786       20.04s         4.65s
+    Vcdiff-d         -            100971       10.93s         1.92s
+    Vcdiff-dc        -             97246       20.03s         1.84s
+    Vcdiff-dcs       -            256445       44.81s         1.84s
+
+		TABLE 1. Compressing gcc-2.95.2.tar given gcc-2.95.1
+
+
+    TABLE 1 shows the raw sizes of gcc-2.95.1.tar and gcc-2.95.2.tar and the
+    sizes of the compressed results. As a pure compressor, the compression
+    rate for Vcdiff is worse than gzip and better than compress. The last
+    three rows shows that when two file versions are very similar, differencing
+    can have dramatically good compression rates. Vcdiff-d and Vcdiff-dc use
+    the same simple window selection method but Vcdiff-dc also does compression
+    so its output is slightly smaller. Vcdiff-dcs uses a heuristic based on
+    data content to search for source data that likely will match a given target
+    window. Although it does a good job, the heuristic did not always find the
+    best matches which are given by the simple algorithm of Vcdiff-d.  As a
+    result, the output size is slightly larger. Note also that there is a large
+    cost in computing matching windows this way. Finally, the compression times
+    of Vcdiff-d is nearly half of that of Vcdiff-dc. It is tempting to conclude
+    that the compression feature causes the additional time in Vcdiff-dc
+    relative to Vcdiff-d.  However, this is not the case. The hint given to
+    the Vcdiff string matching algorithm that the two files are likely to
+    have very long stretches of matches helps the algorithm to minimize
+    processing of the "source data", thus saving half the time. However, as we
+    shall see below when this hint is wrong, the result is even longer time.
+
+
+                gcc-2.95.2    gcc-2.95.3    compression   decompression
+    raw size      55797760      55787520
+    compress         -          19939453       13.54s	      7.00s
+    gzip             -          12998097       42.63s         5.62s
+    Vcdiff           -          15371737       20.09s         4.74s
+    Vcdiff-d         -          26383849       71.41s         6.41s
+    Vcdiff-dc        -          14461203       42.48s         4.82s
+    Vcdiff-dcs       -           1248543       61.18s         1.99s
+
+		TABLE 2. Compressing gcc-2.95.3.tar given gcc-2.95.2
+
+
+    TABLE 2 shows the raw sizes of gcc-2.95.2.tar and gcc-2.95.3.tar and
+    the sizes of the compressed results. In this case, the tar file of
+    gcc-2.95.3 is rearranged in a way that makes the straightforward method
+    of matching file offsets for source and target windows fail. As a
+    result, Vcdiff-d performs rather dismally both in time and output size.
+    The large time for Vcdiff-d is directly due to fact that the string
+    matching algorithm has to work much harder to find matches when the hint
+    that two files have long matching stretches fails to hold. On the other
+    hand, Vcdiff-dc does both differencing and compression resulting in good
+    output size. Finally, the window searching heuristic used in Vcdiff-dcs is
+    effective in finding the right matching source windows for target windows
+    resulting a small output size. This shows why the data format needs to
+    have a way to specify matching windows to gain performance. Finally,
+    we note that the decoding times are always good regardless of how
+    the string matching or window searching algorithms perform.
+
+
+9. FURTHER ISSUES
+
+    This document does not address a few issues:
+
+    Secondary compressors:
+        As discussed in Section 4.3, certain sections in the delta encoding
+	of a window may be further compressed by a secondary compressor.
+	In our experience, the basic Vcdiff format is adequate for most
+	purposes so that secondary compressors are seldom needed. In
+        particular, for normal use of data differencing where the files to
+	be compared have long stretches of matches, much of the gain in
+	compression rate is already achieved by normal string matching.
+	Thus, the use of secondary compressors is seldom needed in this case.
+	However, for applications beyond differencing of such nearly identical
+	files, secondary compressors may be needed to achieve maximal
+	compressed results.
+
+        Therefore, we recommend to leave the Vcdiff data format defined
+	as in this document so that the use of secondary compressors
+ 	can be implemented when they become needed in the future.
+        The formats of the compressed data via such compressors or any
+	compressors that may be defined in the future are left open to
+	their implementations.  These could include Huffman encoding,
+	arithmetic encoding, and splay tree encoding [8,9].
+
+    Large file system vs. small file system:
+	As discussed in Section 4, a target window in a large file may be
+	compared against some source window in another file or in the same
+	file (from some earlier part). In that case, the file offset of the
+	source window is specified as a variable-sized integer in the delta
+	encoding. There is a possibility that the encoding was computed on
+	a system supporting much larger files than in a system where
+	the data may be decoded (e.g., 64-bit file systems vs. 32-bit file
+	systems). In that case, some target data may not be recoverable.
+	This problem could afflict any compression format, and ought
+	to be resolved with a generic negotiation mechanism in the
+	appropriate protocol(s).
+
+
+10.  SUMMARY
+
+    We have described Vcdiff, a general and portable encoding format for
+    compression and differencing. The format is good in that it allows
+    implementing a decoder without knowledge of the encoders. Further,
+    ignoring the use of secondary compressors not defined within the format,
+    the decoding algorithms runs in linear time and requires working space
+    proportional to window sizes.
+
+
+
+11. ACKNOWLEDGEMENTS
+
+    Thanks are due to Balachander Krishnamurthy, Jeff Mogul and Arthur Van Hoff
+    who provided much encouragement to publicize Vcdiff. In particular, Jeff
+    helped clarifying the description of the data format presented here.
+
+
+
+12. SECURITY CONSIDERATIONS
+
+    Vcdiff only provides a format to encode compressed and differenced data.
+    It does not address any issues concerning how such data are, in fact,
+    stored in a given file system or the run-time memory of a computer system.
+    Therefore, we do not anticipate any security issues with respect to Vcdiff.
+
+
+
+13. SOURCE CODE AVAILABILITY
+
+    Vcdiff is implemented as a data transforming method in Phong Vo's
+    Vcodex library. AT&T Corp. has made the source code for Vcodex available
+    for anyone to use to transmit data via HTTP/1.1 Delta Encoding [10,11].
+    The source code and according license is accessible at the below URL:
+
+          http://www.research.att.com/sw/tools
+
+
+14. INTELLECTUAL PROPERTY RIGHTS
+
+   The IETF has been notified of intellectual property rights claimed in
+   regard to some or all of the specification contained in this
+   document.  For more information consult the online list of claimed
+   rights, at <http://www.ietf.org/ipr.html>.
+
+   The IETF takes no position regarding the validity or scope of any
+   intellectual property or other rights that might be claimed to
+   pertain to the implementation or use of the technology described in
+   this document or the extent to which any license under such rights
+   might or might not be available; neither does it represent that it
+   has made any effort to identify any such rights.  Information on the
+   IETF's procedures with respect to rights in standards-track and
+   standards-related documentation can be found in BCP-11.  Copies of
+   claims of rights made available for publication and any assurances of
+   licenses to be made available, or the result of an attempt made to
+   obtain a general license or permission for the use of such
+   proprietary rights by implementors or users of this specification can
+   be obtained from the IETF Secretariat.
+
+
+
+15. IANA CONSIDERATIONS
+
+   The Internet Assigned Numbers Authority (IANA) administers the number
+   space for Secondary Compressor ID values.  Values and their meaning
+   must be documented in an RFC or other peer-reviewed, permanent, and
+   readily available reference, in sufficient detail so that
+   interoperability between independent implementations is possible.
+   Subject to these constraints, name assignments are First Come, First
+   Served - see RFC2434 [13].  Legal ID values are in the range 1..255.
+
+   This document does not define any values in this number space.
+
+
+16. REFERENCES
+
+    [1] D.G. Korn and K.P. Vo, Vdelta: Differencing and Compression,
+        Practical Reusable Unix Software, Editor B. Krishnamurthy,
+        John Wiley & Sons, Inc., 1995.
+
+    [2] J. Ziv and A. Lempel, A Universal Algorithm for Sequential Data
+        Compression, IEEE Trans. on Information Theory, 23(3):337-343, 1977.
+
+    [3] W. Tichy, The String-to-String Correction Problem with Block Moves,
+        ACM Transactions on Computer Systems, 2(4):309-321, November 1984.
+
+    [4] E.M. McCreight, A Space-Economical Suffix Tree Construction
+        Algorithm, Journal of the ACM, 23:262-272, 1976.
+
+    [5] J.J. Hunt, K.P. Vo, W. Tichy, An Empirical Study of Delta Algorithms,
+        IEEE Software Configuration and Maintenance Workshop, 1996.
+
+    [6] J.J. Hunt, K.P. Vo, W. Tichy, Delta Algorithms: An Empirical Analysis,
+        ACM Trans. on Software Engineering and Methodology, 7:192-214, 1998.
+
+    [7] D.G. Korn, K.P. Vo, Sfio: A buffered I/O Library,
+        Proc. of the Summer '91 Usenix Conference, 1991.
+
+    [8] D. W. Jones, Application of Splay Trees to Data Compression,
+        CACM, 31(8):996:1007.
+
+    [9] M. Nelson, J. Gailly, The Data Compression Book, ISBN 1-55851-434-1,
+        M&T Books, New York, NY, 1995.
+
+   [10] J.C. Mogul, F. Douglis, A. Feldmann, and B. Krishnamurthy,
+        Potential benefits of delta encoding and data compression for HTTP,
+        SIGCOMM '97, Cannes, France, 1997.
+
+   [11] J.C. Mogul, B. Krishnamurthy, F. Douglis, A. Feldmann,
+        Y. Goland, and A. Van Hoff, Delta Encoding in HTTP,
+        IETF, draft-mogul-http-delta-10, 2001.
+
+   [12] S. Bradner, Key words for use in RFCs to Indicate Requirement Levels,
+        RFC 2119, March 1997.
+
+   [13] T. Narten, H. Alvestrand, Guidelines for Writing an IANA
+        Considerations Section in RFCs, RFC2434, October 1998.
+
+
+
+17. AUTHOR'S ADDRESS
+
+    Kiem-Phong Vo (main contact)
+    AT&T Labs, Room D223
+    180 Park Avenue
+    Florham Park, NJ 07932
+    Email: kpv@research.att.com
+    Phone: 1 973 360 8630
+
+    David G. Korn
+    AT&T Labs, Room D237
+    180 Park Avenue
+    Florham Park, NJ 07932
+    Email: dgk@research.att.com
+    Phone: 1 973 360 8602
+
+    Jeffrey C. Mogul
+    Western Research Laboratory
+    Compaq Computer Corporation
+    250 University Avenue
+    Palo Alto, California, 94305, U.S.A.
+    Email: JeffMogul@acm.org
+    Phone: 1 650 617 3304 (email preferred)
+
+    Joshua P. MacDonald
+    Computer Science Division
+    University of California, Berkeley
+    345 Soda Hall
+    Berkeley, CA 94720
+    Email: jmacd@cs.berkeley.edu
diff --git a/xdelta3/junk.py b/xdelta3/junk.py
new file mode 100755
index 0000000..384951e
--- /dev/null
+++ b/xdelta3/junk.py
@@ -0,0 +1,11 @@
+#!/usr/bin/python
+
+bytes = ''
+
+for x in range(0, 250):
+  bytes = bytes + ('%c%c%c%c=' % (x, x+1, x+2, x+3))
+
+for x in range(0, 250):
+  bytes = bytes + ('%c' % x)
+  
+print bytes
diff --git a/xdelta3/linkxd3lib.c b/xdelta3/linkxd3lib.c
new file mode 100755
index 0000000..d605fa6
--- /dev/null
+++ b/xdelta3/linkxd3lib.c
@@ -0,0 +1,47 @@
+#include "xdelta3.h"
+
+extern int VVV;
+
+int VVV;
+
+void use(int r)
+{
+  VVV = r;
+}
+
+int main() {
+  xd3_config config;
+  xd3_stream stream;
+  xd3_source source;
+
+  xd3_init_config (& config, 0);
+  use (xd3_config_stream (&stream, &config));
+  use (xd3_close_stream (&stream));
+  xd3_abort_stream (&stream);
+  xd3_free_stream (&stream);
+  
+  xd3_avail_input (& stream, NULL, 0);
+  xd3_consume_output (& stream);
+  
+  use (xd3_bytes_on_srcblk (& source, 0));
+  use (xd3_set_source (& stream, & source));
+  xd3_set_flags (& stream, 0);
+  
+  use (xd3_decode_completely (& stream, NULL, 0, NULL, NULL, 0));
+  use (xd3_decode_input (&stream));
+  use (xd3_decoder_needs_source (& stream));
+  use (xd3_get_appheader (& stream, NULL, NULL));
+  
+  use ((int) xd3_errstring (& stream));
+  use ((int) xd3_strerror (0));
+			     
+#if XD3_ENCODER
+  use (xd3_encode_input (&stream));
+  use (xd3_encode_completely (& stream, NULL, 0, NULL, NULL, 0));
+  use (xd3_set_appheader (& stream));
+  use (xd3_encoder_used_source (& stream));
+  use (xd3_encoder_srcbase (& stream));
+  use (xd3_encoder_srclen (& stream));
+#endif
+  return 0;
+}
diff --git a/xdelta3/rcs_junk.cc b/xdelta3/rcs_junk.cc
new file mode 100755
index 0000000..ac49644
--- /dev/null
+++ b/xdelta3/rcs_junk.cc
@@ -0,0 +1,1861 @@
+typedef struct _RcsWalker               RcsWalker;
+typedef struct _RcsFile                 RcsFile;
+typedef struct _RcsVersion              RcsVersion;
+typedef struct _RcsStats                RcsStats;
+typedef struct _IntStat                 IntStat;
+typedef struct _DblStat                 DblStat;
+typedef struct _BinCounter              BinCounter;
+typedef struct _ConfigOption            ConfigOption;
+
+struct _RcsWalker {
+  void*    (* initialize)    (void);
+  int      (* finalize)      (RcsStats* stats, void* data);
+  int      (* onefile)       (RcsFile* rcs, RcsStats* stats, void* data);
+  int      (* dateorder)     (RcsFile* rcs, RcsVersion* v, void* data);
+  int      (* delta_orig)    (RcsFile* rcs, RcsVersion* from, RcsVersion *to, void* data);
+  int      (* delta_date)    (RcsFile* rcs, RcsVersion* from, RcsVersion *to, void* data);
+  int      min_versions;
+  int      max_versions;
+  gboolean write_files;
+};
+
+struct _RcsVersion {
+  RcsFile    *rcs;
+  time_t      date;
+  int         dateseq;
+  int         chain_length;
+  char       *vname;
+  off_t       size;
+  int         cc;
+  guint8*     segment;
+  char       *filename;
+  RcsVersion *parent;
+  GSList     *children;
+  guint       on_trunk : 1;
+};
+
+struct _RcsFile {
+  char       *filename;
+  char       *copyname;
+  char       *headname;
+
+  int         version_count;
+  int         forward_count;
+  int         reverse_count;
+  int         branch_count;
+
+  RcsVersion *versions;
+  RcsVersion **versions_date;
+
+  RcsVersion *head_version;
+  RcsVersion *root_version;
+
+  off_t       total_size;
+
+  guint       atflag : 1;
+};
+
+struct _RcsStats {
+  BinCounter *avg_version_size;
+  IntStat* version_stat;
+  IntStat* forward_stat;
+  IntStat* reverse_stat;
+  IntStat* branch_stat;
+  IntStat* unencoded_stat;
+  IntStat* literal_stat;
+};
+
+struct _IntStat {
+  const char* name;
+  int count;
+  long long sum;
+  long long min;
+  long long max;
+
+  GArray *values;
+};
+
+struct _DblStat {
+  const char* name;
+  int count;
+  double sum;
+  double min;
+  double max;
+
+  GArray *values;
+};
+
+struct _BinCounter {
+  const char *name;
+  GPtrArray  *bins;
+};
+
+enum _ConfigArgument {
+  CO_Required,
+  CO_Optional,
+  CO_None
+};
+
+typedef enum _ConfigArgument ConfigArgument;
+
+enum _ConfigOptionType {
+  CD_Bool,
+  CD_Int32,
+  CD_Double,
+  CD_String
+};
+
+typedef enum _ConfigOptionType ConfigOptionType;
+
+enum _ConfigStyle {
+  CS_Ignore,
+  CS_UseAsFile,
+  CS_Use
+};
+
+typedef enum _ConfigStyle ConfigStyle;
+
+struct _ConfigOption {
+  const char       *name;
+  const char       *abbrev;
+  ConfigStyle       style;
+  ConfigArgument    arg;
+  ConfigOptionType  type;
+  void             *value;
+  gboolean          found;
+};
+
+/* RCS inspection stuff
+ */
+
+void                rcswalk_init   (void);
+int            rcswalk        (RcsWalker *walker, const char* copy_base);
+void                rcswalk_report (RcsStats* stats);
+
+IntStat*            stat_int_new      (const char* name);
+void                stat_int_add_item (IntStat* stat, long long v);
+void                stat_int_report   (IntStat* stat);
+
+DblStat*            stat_dbl_new      (const char* name);
+void                stat_dbl_add_item (DblStat* stat, double v);
+void                stat_dbl_report   (DblStat* stat);
+
+BinCounter*         stat_bincount_new      (const char* name);
+void                stat_bincount_add_item (BinCounter* bc, int bin, double val);
+void                stat_bincount_report   (BinCounter* bc);
+
+/* Experiment configuration stuff
+ */
+
+void                config_register   (ConfigOption *opts, int nopts);
+int            config_parse      (const char* config_file);
+int            config_done       (void);
+void                config_help       (void);
+void                config_set_string (const char* var, const char* val);
+int            config_clear_dir  (const char* dir);
+int            config_create_dir (const char* dir);
+FILE*               config_output     (const char* fmt, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+#include "rcswalk.h"
+#include "edsio.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <math.h>
+
+#undef BUFSIZE
+#define BUFSIZE (1<<14)
+
+char       *tmp_file_1;
+gboolean    tmp_file_1_free = TRUE;
+char       *tmp_file_2;
+gboolean    tmp_file_2_free = TRUE;
+
+int         skip_count;
+int         small_count;
+int         large_count;
+int         process_count;
+
+extern time_t str2time (char const *, time_t, long);
+
+static guint8 readbuf[BUFSIZE];
+
+static const char* rcswalk_input_dir = NULL;
+static const char* config_output_base = NULL;
+static const char* config_output_dir = NULL;
+static const char* rcswalk_experiment = NULL;
+
+static ConfigOption rcswalk_options[] = {
+  { "rcswalk_experiment", "ex", CS_Use,       CO_Required, CD_String, & rcswalk_experiment },
+  { "rcs_input_dir",      "id", CS_UseAsFile, CO_Required, CD_String, & rcswalk_input_dir }
+};
+
+static ConfigOption config_options[] = {
+  { "config_output_base", "ob", CS_Ignore, CO_Required, CD_String, & config_output_base }
+};
+
+
+void
+rcswalk_free_segment (RcsVersion *v)
+{
+  if (v->segment)
+    g_free (v->segment);
+
+  if (v->filename == tmp_file_1)
+    tmp_file_1_free = TRUE;
+  else if (v->filename == tmp_file_2)
+    tmp_file_2_free = TRUE;
+  else if (v->filename)
+    g_free (v->filename);
+
+  v->segment = NULL;
+  v->filename = NULL;
+}
+
+int
+rcswalk_checkout (RcsFile* rcs, RcsWalker* walker, RcsVersion *v)
+{
+  FILE* out;
+  char cmdbuf[1024];
+  int nread;
+  int alloc = BUFSIZE;
+  int pos = 0;
+
+  sprintf (cmdbuf, "co -ko -p%s %s 2>/dev/null\n", v->vname, rcs->filename);
+
+  g_assert (! v->segment);
+
+  v->segment = g_malloc (alloc);
+
+  if (! (out = popen (cmdbuf, "r")))
+    {
+      g_warning ("popen failed: %s: %s", cmdbuf, g_strerror (errno));
+      return errno;
+    }
+
+  for (;;)
+    {
+      nread = fread (readbuf, 1, BUFSIZE, out);
+
+      if (nread == 0)
+	break;
+
+      if (nread < 0)
+	{
+	  g_warning ("fread failed: %s", g_strerror (errno));
+	  return errno;
+	}
+
+      if (pos + nread > alloc)
+	{
+	  alloc *= 2;
+	  v->segment = g_realloc (v->segment, alloc);
+	}
+
+      memcpy (v->segment + pos, readbuf, nread);
+
+      pos += nread;
+    }
+
+  if (pclose (out) < 0)
+    {
+      g_warning ("pclose failed");
+      return errno;
+    }
+
+  v->size = pos;
+
+  if (walker->write_files)
+    {
+      char* file = NULL;
+
+      if (! file && tmp_file_1_free)
+	{
+	  file = tmp_file_1;
+	  tmp_file_1_free = FALSE;
+	}
+
+      if (! file && tmp_file_2_free)
+	{
+	  file = tmp_file_2;
+	  tmp_file_2_free = FALSE;
+	}
+
+      g_assert (file);
+
+      v->filename = file;
+
+      if (! (out = fopen (file, "w")))
+	{
+	  g_warning ("fopen failed: %s\n", file);
+	  return errno;
+	}
+
+      if (fwrite (v->segment, v->size, 1, out) != 1)
+	{
+	  g_warning ("fwrite failed: %s\n", file);
+	  return errno;
+	}
+
+      if (fclose (out) < 0)
+	{
+	  g_warning ("fclose failed: %s\n", file);
+	  return errno;
+	}
+    }
+
+  return 0;
+}
+
+int
+rcswalk_delta_date (RcsFile* rcs, RcsWalker* walker, void* data)
+{
+  int i;
+  int ret;
+  RcsVersion *vf = NULL;
+  RcsVersion *vt = NULL;
+
+  for (i = 0; i < (rcs->version_count-1); i += 1)
+    {
+      vf = rcs->versions_date[i+1];
+      vt = rcs->versions_date[i];
+
+      if (! vt->segment && (ret = rcswalk_checkout (rcs, walker, vt))) {
+	return ret;
+      }
+
+      if ((ret = rcswalk_checkout (rcs, walker, vf))) {
+	return ret;
+      }
+
+      if ((ret = walker->delta_date (rcs, vf, vt, data))) {
+	return ret;
+      }
+
+      rcswalk_free_segment (vt);
+    }
+
+  if (vf) rcswalk_free_segment (vf);
+  if (vt) rcswalk_free_segment (vt);
+
+  return 0;
+}
+
+int
+rcswalk_delta_orig (RcsFile* rcs, RcsWalker* walker, RcsVersion* version, int *count, void* data)
+{
+  int ret;
+  GSList *c;
+  RcsVersion *child;
+
+  for (c = version->children; c; c = c->next)
+    {
+      gboolean reverse;
+
+      child = c->data;
+
+      if (! version->segment)
+	{
+	  if ((ret = rcswalk_checkout (rcs, walker, version))) {
+	    return ret;
+	  }
+	}
+
+      if ((ret = rcswalk_checkout (rcs, walker, child))) {
+	return ret;
+      }
+
+      reverse = version->on_trunk && child->on_trunk;
+
+      (* count) += 1;
+
+      if ((ret = walker->delta_orig (rcs, reverse ? child : version, reverse ? version : child, data))) {
+	return ret;
+      }
+
+      rcswalk_free_segment (version);
+
+      if ((ret = rcswalk_delta_orig (rcs, walker, child, count, data))) {
+	return ret;
+      }
+    }
+
+  rcswalk_free_segment (version);
+  return 0;
+}
+
+int
+rcswalk_dateorder (RcsFile* rcs, RcsWalker *walker, RcsStats *stats, void* data)
+{
+  int i, ret;
+
+  for (i = 0; i < rcs->version_count; i += 1)
+    {
+      RcsVersion *v = rcs->versions_date[i];
+
+      if ((ret = rcswalk_checkout (rcs, walker, v))) {
+	return ret;
+      }
+
+      stat_bincount_add_item (stats->avg_version_size, i, v->size);
+
+      if ((ret = walker->dateorder (rcs, v, data))) {
+	return ret;
+      }
+
+      rcswalk_free_segment (v);
+    }
+
+  return 0;
+}
+
+gboolean
+rcswalk_match (char** line_p, char* str)
+{
+  int len = strlen (str);
+
+  if (strncmp (*line_p, str, len) == 0)
+    {
+      (*line_p) += len;
+      return TRUE;
+    }
+
+  return FALSE;
+}
+
+void
+rcswalk_find_parent (RcsFile *rcs, GHashTable* hash, RcsVersion *v)
+{
+  char *lastdot;
+  char  mbuf[1024];
+  int   lastn;
+  RcsVersion *p;
+
+  strcpy (mbuf, v->vname);
+
+  if (! (lastdot = strchr (mbuf, '.')))
+    abort ();
+
+  if (! (lastdot = strchr (lastdot+1, '.')))
+    v->on_trunk = TRUE;
+
+  lastdot = strrchr (mbuf, '.');
+  lastn = atoi (lastdot + 1);
+
+  do
+    {
+      if (lastn == 1)
+	{
+	  (*lastdot) = 0;
+
+	  if (strcmp (mbuf, "1") == 0)
+	    {
+	      /* Assuming the first version is always "1.1".
+	       */
+	      rcs->root_version = v;
+	      return;
+	    }
+	  else if (! (lastdot = strrchr (mbuf, '.')))
+	    {
+	      int i = 1;
+	      int br = atoi (mbuf) - 1;
+	      RcsVersion *p2 = NULL;
+
+	      /* Now we have something like "2.1" and need to
+	       * search for the highest "1.x" version.
+	       */
+
+	      do
+		{
+		  sprintf (mbuf, "%d.%d", br, i++);
+		  p = p2;
+		}
+	      while ((p2 = g_hash_table_lookup (hash, mbuf)));
+
+	      if (p == NULL)
+		{
+		  rcs->root_version = v;
+		  return;
+		}
+
+	      break;
+	    }
+	  else
+	    {
+	      /* 1.2.3.1 => 1.2 */
+	      (*lastdot) = 0;
+	      lastdot = strrchr (mbuf, '.');
+	      lastn = atoi (lastdot + 1);
+	    }
+	}
+      else
+	{
+	  lastn -= 1;
+	  sprintf (lastdot, ".%d", lastn);
+	}
+    }
+  while (! (p = g_hash_table_lookup (hash, mbuf)));
+
+  g_assert (p);
+
+  v->parent = p;
+
+  p->children = g_slist_prepend (p->children, v);
+}
+
+int
+rcswalk_traverse_graph (RcsFile* rcs, RcsVersion* version, RcsVersion *parent)
+{
+  GSList *c;
+  int distance = -1;
+
+  version->cc = g_slist_length (version->children);
+
+  if (version->cc > 1)
+    rcs->branch_count += (version->cc - 1);
+
+  if (parent)
+    {
+      /* Insure that there is proper date ordering. */
+      if (version->date <= parent->date)
+	version->date = parent->date + 1;
+
+      if (parent->on_trunk && version->on_trunk)
+	rcs->reverse_count += 1;
+      else
+	rcs->forward_count += 1;
+    }
+
+  for (c = version->children; c; c = c->next)
+    {
+      int c_dist = rcswalk_traverse_graph (rcs, c->data, version);
+
+      distance = MAX (distance, c_dist);
+    }
+
+  if (version == rcs->head_version)
+    distance = 0;
+
+  if (distance >= 0)
+    {
+      version->chain_length = distance;
+
+      return distance + 1;
+    }
+
+  return -1;
+}
+
+void
+rcswalk_compute_chain_length (RcsFile* rcs, RcsVersion* version, RcsVersion *parent)
+{
+  GSList *c;
+
+  if (! parent)
+    {
+      g_assert (version->chain_length >= 0);
+    }
+  else if (version->chain_length < 0)
+    {
+      version->chain_length = parent->chain_length + 1;
+    }
+
+  for (c = version->children; c; c = c->next)
+    {
+      rcswalk_compute_chain_length (rcs, c->data, version);
+    }
+}
+
+int
+rcswalk_date_compare (const void* a, const void* b)
+{
+  RcsVersion **ra = (void*) a;
+  RcsVersion **rb = (void*) b;
+
+  return (*ra)->date - (*rb)->date;
+}
+
+int
+rcswalk_build_graph (RcsFile* rcs)
+{
+  GHashTable* hash = g_hash_table_new (g_str_hash, g_str_equal);
+  int i;
+
+  for (i = 0; i < rcs->version_count; i += 1)
+    g_hash_table_insert (hash, rcs->versions[i].vname, rcs->versions + i);
+
+  for (i = 0; i < rcs->version_count; i += 1)
+    {
+      RcsVersion *v = rcs->versions + i;
+
+      v->chain_length = -1;
+      v->rcs = rcs;
+
+      rcswalk_find_parent (rcs, hash, v);
+    }
+
+  rcs->head_version = g_hash_table_lookup (hash, rcs->headname);
+
+  rcswalk_traverse_graph (rcs, rcs->root_version, NULL);
+
+  rcswalk_compute_chain_length (rcs, rcs->root_version, NULL);
+
+  for (i = 0; i < rcs->version_count; i += 1)
+    rcs->versions_date[i] = rcs->versions + i;
+
+  qsort (rcs->versions_date, rcs->version_count, sizeof (RcsVersion*), & rcswalk_date_compare);
+
+  for (i = 0; i < rcs->version_count; i += 1)
+    {
+      RcsVersion *v = rcs->versions_date[i];
+
+      v->dateseq = i;
+    }
+
+  g_hash_table_destroy (hash);
+
+  return 0;
+}
+
+#define HEAD_STATE 0
+#define BAR_STATE 1
+#define REV_STATE 2
+#define DATE_STATE 3
+
+int
+rcswalk_load (RcsFile *rcs, gboolean *skip)
+{
+  FILE* rlog;
+  char cmdbuf[1024];
+  char oneline[1024], *oneline_p;
+  char rbuf[1024];
+  int version_i = 0, ret;
+  int read_state = HEAD_STATE;
+
+  sprintf (cmdbuf, "rlog %s", rcs->filename);
+
+  if (! (rlog = popen (cmdbuf, "r")))
+    {
+      g_warning ("popen failed: %s", cmdbuf);
+      return errno;
+    }
+
+  rcs->headname = NULL;
+
+  while (fgets (oneline, 1024, rlog))
+    {
+      oneline_p = oneline;
+
+      if (read_state == HEAD_STATE && rcswalk_match (& oneline_p, "total revisions: "))
+	{
+	  if (sscanf (oneline_p, "%d", & rcs->version_count) != 1)
+	    goto badscan;
+
+	  rcs->versions = g_new0 (RcsVersion, rcs->version_count);
+	  rcs->versions_date = g_new (RcsVersion*, rcs->version_count);
+	  read_state = BAR_STATE;
+	}
+      else if (read_state == HEAD_STATE && rcswalk_match (& oneline_p, "head: "))
+	{
+	  if (sscanf (oneline_p, "%s", rbuf) != 1)
+	    goto badscan;
+
+	  rcs->headname = g_strdup (rbuf);
+	  read_state = HEAD_STATE; /* no change */
+	}
+      else if (read_state == BAR_STATE && rcswalk_match (& oneline_p, "----------------------------"))
+	{
+	  read_state = REV_STATE;
+	}
+      else if (read_state == REV_STATE && rcswalk_match (& oneline_p, "revision "))
+	{
+	  if (version_i >= rcs->version_count)
+	    {
+	      /* jkh likes to insert the rlog of one RCS file into the log
+	       * message of another, and this can confuse things.  Why, oh why,
+	       * doesn't rlog have an option to not print the log?
+	       */
+	      fprintf (stderr, "rcswalk: too many versions: skipping file %s\n", rcs->filename);
+	      *skip = TRUE;
+	      skip_count += 1;
+	      pclose (rlog);
+	      return 0;
+	    }
+
+	  if (sscanf (oneline_p, "%s", rbuf) != 1)
+	    goto badscan;
+
+	  rcs->versions[version_i].vname = g_strdup (rbuf);
+	  read_state = DATE_STATE;
+
+	  g_assert (rcs->versions[version_i].vname);
+	}
+      else if (read_state == DATE_STATE && rcswalk_match (& oneline_p, "date: "))
+	{
+	  char* semi = strchr (oneline_p, ';');
+
+	  if (! semi)
+	    goto badscan;
+
+	  strncpy (rbuf, oneline_p, semi - oneline_p);
+
+	  rbuf[semi - oneline_p] = 0;
+
+	  rcs->versions[version_i].date = str2time (rbuf, 0, 0);
+
+	  version_i += 1;
+	  read_state = BAR_STATE;
+	}
+    }
+
+  if (! rcs->headname)
+    {
+      fprintf (stderr, "rcswalk: no head version: skipping file %s\n", rcs->filename);
+      *skip = TRUE;
+      skip_count += 1;
+      pclose (rlog);
+      return 0;
+    }
+
+  if (pclose (rlog) < 0)
+    {
+      g_warning ("pclose failed: %s", cmdbuf);
+      return errno;
+    }
+
+  if ((ret = rcswalk_build_graph (rcs))) {
+    return ret;
+  }
+
+  return 0;
+
+ badscan:
+
+  pclose (rlog);
+
+  g_warning ("rlog syntax error");
+  return -1;
+}
+
+void
+rcswalk_free (RcsFile* rcs)
+{
+  int i;
+
+  for (i = 0; i < rcs->version_count; i += 1)
+    {
+      g_free (rcs->versions[i].vname);
+      g_slist_free (rcs->versions[i].children);
+    }
+
+  g_free (rcs->filename);
+  g_free (rcs->headname);
+  g_free (rcs->versions);
+  g_free (rcs->versions_date);
+  g_free (rcs);
+}
+
+int
+rcswalk_one (char* rcsfile, char* copyfile, RcsWalker* walker, RcsStats* stats, void* data)
+{
+  RcsFile* rcs;
+  int i, ret;
+  long long maxsize = 0;
+  gboolean skip = FALSE;
+
+  rcs = g_new0 (RcsFile, 1);
+
+  rcs->filename = g_strdup (rcsfile);
+  rcs->copyname = copyfile;
+
+  if ((ret = rcswalk_load (rcs, & skip))) {
+    return ret;
+  }
+
+  if (walker->min_versions > rcs->version_count)
+    {
+      small_count += 1;
+      skip = TRUE;
+    }
+
+  if (walker->max_versions < rcs->version_count)
+    {
+      large_count += 1;
+      skip = TRUE;
+    }
+
+  if (! skip)
+    {
+      process_count += 1;
+
+      if (walker->dateorder && (ret = rcswalk_dateorder (rcs, walker, stats, data))) {
+	return ret;
+      }
+
+      if (walker->delta_orig)
+	{
+	  int count = 0;
+
+	  if ((ret = rcswalk_delta_orig (rcs, walker, rcs->root_version, & count, data))) {
+	    return ret;
+	  }
+
+	  g_assert (count == (rcs->version_count - 1));
+	}
+
+      if (walker->delta_date && (ret = rcswalk_delta_date (rcs, walker, data))) {
+	return ret;
+      }
+
+      for (i = 0; i < rcs->version_count; i += 1)
+	{
+	  rcs->total_size += rcs->versions[i].size;
+	  maxsize = MAX (rcs->versions[i].size, maxsize);
+	}
+
+      stat_int_add_item (stats->version_stat, rcs->version_count);
+      stat_int_add_item (stats->forward_stat, rcs->forward_count);
+      stat_int_add_item (stats->reverse_stat, rcs->reverse_count);
+      stat_int_add_item (stats->branch_stat, rcs->branch_count);
+      stat_int_add_item (stats->unencoded_stat, rcs->total_size);
+      stat_int_add_item (stats->literal_stat, maxsize);
+
+      if (walker->onefile && (ret = walker->onefile (rcs, stats, data))) {
+	return ret;
+      }
+    }
+
+  rcswalk_free (rcs);
+
+  return 0;
+}
+
+int
+rcswalk_dir (const char* dir, RcsWalker* walker, RcsStats* stats, void* data, const char* copy_dir)
+{
+  int ret;
+  DIR* thisdir;
+  struct dirent* ent;
+
+  if (copy_dir && (ret = config_create_dir (copy_dir))) {
+    return ret;
+  }
+
+  if (! (thisdir = opendir (dir)))
+    {
+      g_warning ("opendir failed: %s", dir);
+      return errno;
+    }
+
+  while ((ent = readdir (thisdir)))
+    {
+      char* name = ent->d_name;
+      int len;
+      struct stat buf;
+      char* fullname;
+      char* copyname = NULL;
+
+      if (strcmp (name, ".") == 0)
+	continue;
+
+      if (strcmp (name, "..") == 0)
+	continue;
+
+      len = strlen (name);
+
+      fullname = g_strdup_printf ("%s/%s", dir, name);
+
+      if (copy_dir)
+	copyname = g_strdup_printf ("%s/%s", copy_dir, name);
+
+      if (len > 2 && strcmp (name + len - 2, ",v") == 0)
+	{
+	  if ((ret = rcswalk_one (fullname, copyname, walker, stats, data))) {
+	    goto abort;
+	  }
+	}
+      else
+	{
+	  if (stat (fullname, & buf) < 0)
+	    {
+	      g_warning ("stat failed: %s\n", fullname);
+	      goto abort;
+	    }
+
+	  if (S_ISDIR (buf.st_mode))
+	    {
+	      if ((ret = rcswalk_dir (fullname, walker, stats, data, copyname))) {
+		goto abort;
+	      }
+	    }
+	}
+
+      g_free (fullname);
+
+      if (copyname)
+	g_free (copyname);
+    }
+
+  if (closedir (thisdir) < 0)
+    {
+      g_warning ("closedir failed: %s", dir);
+      return errno;
+    }
+
+  return 0;
+
+ abort:
+
+  if (thisdir)
+    closedir (thisdir);
+
+  return -1;
+}
+
+void
+rcswalk_init (void)
+{
+  config_register (rcswalk_options, ARRAY_SIZE (rcswalk_options));
+}
+
+int
+rcswalk (RcsWalker *walker, const char* copy_base)
+{
+  void* data = NULL;
+  RcsStats stats;
+  int ret;
+
+  skip_count = 0;
+  small_count = 0;
+  process_count = 0;
+  large_count = 0;
+
+  memset (& stats, 0, sizeof (stats));
+
+  stats.avg_version_size = stat_bincount_new ("AvgVersionSize"); /* @@@ leak */
+  stats.version_stat = stat_int_new ("Version"); /* @@@ leak */
+  stats.forward_stat = stat_int_new ("Forward"); /* @@@ leak */
+  stats.reverse_stat = stat_int_new ("Reverse"); /* @@@ leak */
+  stats.branch_stat  = stat_int_new ("Branch"); /* @@@ leak */
+  stats.unencoded_stat = stat_int_new ("Unencoded"); /* @@@ leak */
+  stats.literal_stat   = stat_int_new ("Literal"); /* @@@ leak */
+
+  tmp_file_1 = g_strdup_printf ("%s/rcs1.%d", g_get_tmp_dir (), (int) getpid ());
+  tmp_file_2 = g_strdup_printf ("%s/rcs2.%d", g_get_tmp_dir (), (int) getpid ());
+
+  if (walker->initialize)
+    data = walker->initialize ();
+
+  if ((ret = rcswalk_dir (rcswalk_input_dir, walker, & stats, data, copy_base))) {
+    return ret;
+  }
+
+  if (walker->finalize)
+    {
+      if ((ret = walker->finalize (& stats, data))) {
+	return ret;
+      }
+    }
+
+  unlink (tmp_file_1);
+  unlink (tmp_file_2);
+
+  fprintf (stderr, "rcswalk: processed %d files: too small %d; too large: %d; damaged: %d\n", process_count, small_count, large_count, skip_count);
+
+  return 0;
+}
+
+/* Statistics
+ */
+
+void
+rcswalk_report (RcsStats* set)
+{
+  stat_bincount_report (set->avg_version_size);
+  stat_int_report (set->version_stat);
+  stat_int_report (set->forward_stat);
+  stat_int_report (set->reverse_stat);
+  stat_int_report (set->branch_stat);
+  stat_int_report (set->unencoded_stat);
+  stat_int_report (set->literal_stat);
+}
+
+/* Int stat
+ */
+IntStat*
+stat_int_new (const char* name)
+{
+  IntStat* s = g_new0 (IntStat, 1);
+
+  s->name = name;
+  s->values = g_array_new (FALSE, FALSE, sizeof (long long));
+
+  return s;
+}
+
+void
+stat_int_add_item (IntStat* stat, long long v)
+{
+  if (! stat->count)
+    stat->min = v;
+  stat->count += 1;
+  stat->min = MIN (v, stat->min);
+  stat->max = MAX (v, stat->max);
+  stat->sum += v;
+
+  g_array_append_val (stat->values, v);
+}
+
+double
+stat_int_stddev (IntStat *stat)
+{
+  double f = 0;
+  double m = (double) stat->sum / (double) stat->count;
+  double v;
+  int i;
+
+  for (i = 0; i < stat->count; i += 1)
+    {
+      long long x = g_array_index (stat->values, long long, i);
+
+      f += (m - (double) x) * (m - (double) x);
+    }
+
+  v = f / (double) stat->count;
+
+  return sqrt (v);
+}
+
+int
+ll_comp (const void* a, const void* b)
+{
+  const long long* lla = a;
+  const long long* llb = b;
+  return (*lla) - (*llb);
+}
+
+void
+stat_int_histogram (IntStat *stat)
+{
+  int i, consec;
+  long long cum = 0;
+
+  FILE* p_out;
+  FILE* s_out;
+
+  if (! (p_out = config_output ("%s.pop.hist", stat->name)))
+    abort ();
+
+  if (! (s_out = config_output ("%s.sum.hist", stat->name)))
+    abort ();
+
+  qsort (stat->values->data, stat->count, sizeof (long long), ll_comp);
+
+  for (i = 0; i < stat->count; i += consec)
+    {
+      long long ix = g_array_index (stat->values, long long, i);
+
+      for (consec = 1; (i+consec) < stat->count; consec += 1)
+	{
+	  long long jx = g_array_index (stat->values, long long, i+consec);
+
+	  if (ix != jx)
+	    break;
+	}
+
+      cum += consec * g_array_index (stat->values, long long, i);
+
+      fprintf (p_out, "%qd, %0.3f\n", g_array_index (stat->values, long long, i), (double) (i+consec) / (double) stat->count);
+      fprintf (s_out, "%qd, %0.3f\n", g_array_index (stat->values, long long, i), (double) cum / (double) stat->sum);
+    }
+
+  if (fclose (p_out) < 0 || fclose (s_out) < 0)
+    {
+      g_error ("fclose failed\n");
+    }
+}
+
+void
+stat_int_report (IntStat* stat)
+{
+  FILE* out;
+
+  if (! (out = config_output ("%s.stat", stat->name)))
+    abort ();
+
+  fprintf (out, "Name: %s\n", stat->name);
+  fprintf (out, "Count: %d\n", stat->count);
+  fprintf (out, "Min: %qd\n", stat->min);
+  fprintf (out, "Max: %qd\n", stat->max);
+  fprintf (out, "Sum: %qd\n", stat->sum);
+  fprintf (out, "Mean: %0.2f\n", (double) stat->sum / (double) stat->count);
+  fprintf (out, "Stddev: %0.2f\n", stat_int_stddev (stat));
+
+  if (fclose (out) < 0)
+    g_error ("fclose failed");
+
+  stat_int_histogram (stat);
+}
+
+/* Dbl stat
+ */
+
+DblStat*
+stat_dbl_new (const char* name)
+{
+  DblStat* s = g_new0 (DblStat, 1);
+
+  s->name = name;
+  s->values = g_array_new (FALSE, FALSE, sizeof (double));
+
+  return s;
+}
+
+void
+stat_dbl_add_item (DblStat* stat, double v)
+{
+  if (! stat->count)
+    stat->min = v;
+  stat->count += 1;
+  stat->min = MIN (v, stat->min);
+  stat->max = MAX (v, stat->max);
+  stat->sum += v;
+
+  g_array_append_val (stat->values, v);
+}
+
+double
+stat_dbl_stddev (DblStat *stat)
+{
+  double f = 0;
+  double m = stat->sum / stat->count;
+  double v;
+  int i;
+
+  for (i = 0; i < stat->count; i += 1)
+    {
+      double x = g_array_index (stat->values, double, i);
+
+      f += (m - x) * (m - x);
+    }
+
+  v = f / stat->count;
+
+  return sqrt (v);
+}
+
+int
+dbl_comp (const void* a, const void* b)
+{
+  const double* da = a;
+  const double* db = b;
+  double diff = (*da) - (*db);
+
+  if (diff > 0.0)
+    return 1;
+  else if (diff < 0.0)
+    return -1;
+  else
+    return 0;
+}
+
+void
+stat_dbl_histogram (DblStat *stat)
+{
+  int i, consec;
+  double cum = 0.0;
+
+  FILE* p_out;
+  FILE* s_out;
+
+  if (! (p_out = config_output ("%s.pop.hist", stat->name)))
+    abort ();
+
+  if (! (s_out = config_output ("%s.sum.hist", stat->name)))
+    abort ();
+
+  qsort (stat->values->data, stat->count, sizeof (double), dbl_comp);
+
+  for (i = 0; i < stat->count; i += consec)
+    {
+      double ix = g_array_index (stat->values, double, i);
+
+      for (consec = 1; (i+consec) < stat->count; consec += 1)
+	{
+	  double jx = g_array_index (stat->values, double, i+consec);
+
+	  if (ix != jx)
+	    break;
+	}
+
+      cum += ((double) consec) * g_array_index (stat->values, double, i);
+
+      fprintf (p_out, "%0.6f, %0.3f\n", g_array_index (stat->values, double, i), (double) (i+consec) / (double) stat->count);
+      fprintf (s_out, "%0.6f, %0.3f\n", g_array_index (stat->values, double, i), cum / stat->sum);
+    }
+
+  if (fclose (p_out) < 0 || fclose (s_out) < 0)
+    {
+      g_error ("fclose failed\n");
+    }
+}
+
+void
+stat_dbl_report (DblStat* stat)
+{
+  FILE* out;
+
+  if (! (out = config_output ("%s.stat", stat->name)))
+    abort ();
+
+  fprintf (out, "Name:   %s\n", stat->name);
+  fprintf (out, "Count:  %d\n", stat->count);
+  fprintf (out, "Min:    %0.6f\n", stat->min);
+  fprintf (out, "Max:    %0.6f\n", stat->max);
+  fprintf (out, "Sum:    %0.6f\n", stat->sum);
+  fprintf (out, "Mean:   %0.6f\n", stat->sum / stat->count);
+  fprintf (out, "Stddev: %0.6f\n", stat_dbl_stddev (stat));
+
+  if (fclose (out) < 0)
+    g_error ("fclose failed");
+
+  stat_dbl_histogram (stat);
+}
+
+/* Bincount
+ */
+BinCounter*
+stat_bincount_new (const char* name)
+{
+  BinCounter* bc = g_new0 (BinCounter, 1);
+
+  bc->name = name;
+  bc->bins = g_ptr_array_new ();
+
+  return bc;
+}
+
+void
+stat_bincount_add_item (BinCounter* bc, int bin, double val)
+{
+  GArray* one;
+  int last;
+
+  if (bin >= bc->bins->len)
+    {
+      g_ptr_array_set_size (bc->bins, bin+1);
+    }
+
+  if (! (one = bc->bins->pdata[bin]))
+    {
+      one = bc->bins->pdata[bin] = g_array_new (FALSE, TRUE, sizeof (double));
+    }
+
+  g_assert (one);
+
+  last = one->len;
+
+  g_array_set_size (one, last + 1);
+
+  g_array_index (one, double, last) = val;
+}
+
+void
+stat_bincount_report (BinCounter* bc)
+{
+  FILE *avg_out;
+  FILE *raw_out;
+  int i;
+
+  if (! (avg_out = config_output ("%s.avg", bc->name)))
+    abort ();
+
+  if (! (raw_out = config_output ("%s.raw", bc->name)))
+    abort ();
+
+  for (i = 0; i < bc->bins->len; i += 1)
+    {
+      GArray* one = bc->bins->pdata[i];
+
+      double sum = 0.0;
+      int j;
+
+      for (j = 0; j < one->len; j += 1)
+	{
+	  double d = g_array_index (one, double, j);
+
+	  sum += d;
+
+	  fprintf (raw_out, "%e ", d);
+	}
+
+      fprintf (raw_out, "\n");
+      fprintf (avg_out, "%e %d\n", sum / one->len, one->len);
+    }
+
+  if (fclose (avg_out) < 0)
+    g_error ("fclose failed");
+
+  if (fclose (raw_out) < 0)
+    g_error ("fclose failed");
+}
+
+/* Config stuff
+ */
+
+int
+config_create_dir (const char* dirname)
+{
+  struct stat buf;
+
+  if (stat (dirname, & buf) < 0)
+    {
+      if (mkdir (dirname, 0777) < 0)
+	{
+	  fprintf (stderr, "mkdir failed: %s\n", dirname);
+	  return errno;
+	}
+    }
+  else
+    {
+      if (! S_ISDIR (buf.st_mode))
+	{
+	  fprintf (stderr, "not a directory: %s\n", dirname);
+	  return errno;
+	}
+    }
+
+  return 0;
+}
+
+int
+config_clear_dir (const char* dir)
+{
+  char buf[1024];
+
+  if (dir)
+    {
+      sprintf (buf, "rm -rf %s", dir);
+
+      system (buf);
+    }
+
+  return 0;
+}
+
+static ConfigOption all_options[64];
+static int          option_count;
+
+void
+config_init ()
+{
+  static gboolean once = FALSE;
+  if (! once)
+    {
+      once = TRUE;
+      config_register (config_options, ARRAY_SIZE (config_options));
+    }
+}
+
+void
+config_register (ConfigOption *opts, int nopts)
+{
+  int i;
+
+  config_init ();
+
+  for (i = 0; i < nopts; i += 1)
+    {
+      all_options[option_count++] = opts[i];
+    }
+}
+
+void
+config_set_string (const char* var, const char* val)
+{
+  int i;
+
+  for (i = 0; i < option_count; i += 1)
+    {
+      ConfigOption *opt = all_options + i;
+
+      if (strcmp (opt->name, var) == 0)
+	{
+	  (* (const char**) opt->value) = val;
+	  opt->found = TRUE;
+	  return;
+	}
+    }
+}
+
+int
+config_parse (const char* config_file)
+{
+  FILE *in;
+  char oname[1024], value[1024];
+  int i;
+
+  if (! (in = fopen (config_file, "r")))
+    {
+      fprintf (stderr, "fopen failed: %s\n", config_file);
+      return errno;
+    }
+
+  for (;;)
+    {
+      ConfigOption *opt = NULL;
+
+      if (fscanf (in, "%s", oname) != 1)
+	break;
+
+      for (i = 0; i < option_count; i += 1)
+	{
+	  if (strcmp (oname, all_options[i].name) == 0)
+	    {
+	      opt = all_options + i;
+	      break;
+	    }
+	}
+
+      if (opt && opt->arg == CO_None)
+	{
+	  (* (gboolean*) opt->value) = TRUE;
+	  opt->found = TRUE;
+	  continue;
+	}
+
+      if (fscanf (in, "%s", value) != 1)
+	{
+	  fprintf (stderr, "no value for option: %s; file: %s\n", oname, config_file);
+	  goto abort;
+	}
+
+      if (! opt)
+	{
+	  /*fprintf (stderr, "unrecognized option: %s\n", oname);*/
+	  continue;
+	}
+
+      switch (opt->type)
+	{
+	case CD_Bool:
+
+	  if (strcasecmp (value, "yes") == 0 ||
+	      strcasecmp (value, "true") == 0 ||
+	      strcmp     (value, "1") == 0 ||
+	      strcasecmp (value, "on") == 0)
+	    {
+	      ((gboolean*) opt->value) = TRUE;
+	    }
+	  else
+	    {
+	      ((gboolean*) opt->value) = FALSE;
+	    }
+
+	  break;
+	case CD_Int32:
+
+	  if (sscanf (value, "%d", (gint32*) opt->value) != 1)
+	    {
+	      fprintf (stderr, "parse error for option: %s; file: %s\n", oname, config_file);
+	      goto abort;
+	    }
+
+	  break;
+	case CD_Double:
+
+	  if (sscanf (value, "%lf", (double*) opt->value) != 1)
+	    {
+	      fprintf (stderr, "parse error for option: %s; file: %s\n", oname, config_file);
+	      goto abort;
+	    }
+
+	  break;
+	case CD_String:
+
+	  (* (const char**) opt->value) = g_strdup (value);
+
+	  break;
+	}
+
+      opt->found = TRUE;
+    }
+
+  fclose (in);
+
+  return 0;
+
+ abort:
+
+  fclose (in);
+
+  return -1;
+}
+
+int
+config_compute_output_dir ()
+{
+  char tmp[1024];
+  char buf[1024];
+  int i;
+  gboolean last = FALSE;
+
+  buf[0] = 0;
+
+  for (i = 0; i < option_count; i += 1)
+    {
+      ConfigOption *opt = all_options + i;
+
+      if (opt->style == CS_Ignore)
+	continue;
+
+      if (! opt->found)
+	continue;
+
+      if (last)
+	strcat (buf, ",");
+
+      last = TRUE;
+
+      strcat (buf, opt->abbrev);
+      strcat (buf, "=");
+
+      switch (opt->type)
+	{
+	case CD_Bool:
+
+	  if (* (gboolean*) opt->value)
+	    strcat (buf, "true");
+	  else
+	    strcat (buf, "false");
+
+	  break;
+	case CD_Int32:
+
+	  sprintf (tmp, "%d", (* (gint32*) opt->value));
+	  strcat (buf, tmp);
+
+	  break;
+	case CD_Double:
+
+	  sprintf (tmp, "%0.2f", (* (double*) opt->value));
+	  strcat (buf, tmp);
+
+	  break;
+	case CD_String:
+
+	  if (opt->style == CS_UseAsFile)
+	    {
+	      const char* str = (* (const char**) opt->value);
+	      const char* ls = strrchr (str, '/');
+
+	      strcat (buf, ls ? (ls + 1) : str);
+	    }
+	  else
+	    {
+	      strcat (buf, (* (const char**) opt->value));
+	    }
+
+	  break;
+	}
+    }
+
+  config_output_dir = g_strdup_printf ("%s/%s", config_output_base, buf);
+
+  return 0;
+}
+
+int
+config_done (void)
+{
+  int i, ret;
+  FILE *out;
+
+  for (i = 0; i < option_count; i += 1)
+    {
+      ConfigOption *opt = all_options + i;
+
+      if (! opt->found && opt->arg == CO_Required)
+	{
+	  fprintf (stderr, "required option not found: %s\n", all_options[i].name);
+	  return -1;
+	}
+    }
+
+  if ((ret = config_compute_output_dir ())) {
+    return ret;
+  }
+
+  if ((ret = config_clear_dir (config_output_dir))) {
+    return ret;
+  }
+
+  if ((ret = config_create_dir (config_output_dir))) {
+    return ret;
+  }
+
+  if (! (out = config_output ("Options")))
+    abort ();
+
+  for (i = 0; i < option_count; i += 1)
+    {
+      ConfigOption *opt = all_options + i;
+
+      fprintf (out, "option: %s; value: ", all_options[i].name);
+
+      switch (opt->type)
+	{
+	case CD_Bool:
+
+	  fprintf (out, "%s", (* (gboolean*) opt->value) ? "TRUE" : "FALSE");
+
+	  break;
+	case CD_Int32:
+
+	  fprintf (out, "%d", (* (gint32*) opt->value));
+
+	  break;
+	case CD_Double:
+
+	  fprintf (out, "%0.2f", (* (double*) opt->value));
+
+	  break;
+	case CD_String:
+
+	  fprintf (out, "%s", (* (const char**) opt->value));
+
+	  break;
+	}
+
+      fprintf (out, "\n");
+    }
+
+  if (fclose (out))
+    {
+      fprintf (stderr, "fclose failed\n");
+      return errno;
+    }
+
+  return 0;
+}
+
+const char*
+config_help_arg (ConfigOption *opt)
+{
+  switch (opt->arg)
+    {
+    case CO_Required:
+      return "required";
+    case CO_Optional:
+      return "optional";
+    case CO_None:
+      return "no value";
+    }
+
+  return "unknown";
+}
+
+const char*
+config_help_type (ConfigOption *opt)
+{
+  switch (opt->arg)
+    {
+    case CO_None:
+      return "boolean";
+    default:
+      break;
+    }
+
+  switch (opt->type)
+    {
+    case CD_Bool:
+      return "boolean";
+    case CD_Int32:
+      return "int";
+    case CD_Double:
+      return "double";
+    case CD_String:
+      return "string";
+    }
+
+  return "unknown";
+}
+
+void
+config_help (void)
+{
+  int i;
+
+  fprintf (stderr, "Expecting the following options in one or more config files on the command line:\n");
+
+  for (i = 0; i < option_count; i += 1)
+    {
+      ConfigOption *opt = all_options + i;
+
+      fprintf (stderr, "%s: %s %s\n",
+	       opt->name,
+	       config_help_arg (opt),
+	       config_help_type (opt));
+    }
+}
+
+FILE*
+config_output (const char* format, ...)
+{
+  gchar *buffer;
+  gchar *file;
+  va_list args;
+  FILE *f;
+
+  va_start (args, format);
+  buffer = g_strdup_vprintf (format, args);
+  va_end (args);
+
+  file = g_strdup_printf ("%s/%s", config_output_dir, buffer);
+
+  if (! (f = fopen (file, "w")))
+    g_error ("fopen failed: %s\n", buffer);
+
+  g_free (file);
+
+  g_free (buffer);
+
+  return f;
+}
+
+
+#include <edsio.h>
+#include <edsiostdio.h>
+#include <ctype.h>
+#include "xdfs.h"
+
+/* Warning: very cheesy!
+ */
+
+#ifdef DEBUG_EXTRACT
+  FileHandle *fh2 = handle_read_file (filename);
+
+  guint8* debug_buf = g_malloc (buflen);
+
+  if (! handle_read (fh2, debug_buf, buflen))
+    g_error ("read failed");
+#endif
+
+gboolean
+rcs_count (const char* filename, guint *encoded_size)
+{
+  char *readbuf0, *readbuf;
+  gboolean in_string = FALSE;
+  gboolean in_text = FALSE;
+  guint string_start = 0;
+  guint string_end = 0;
+  guint current_pos = 0;
+  /*char *current_delta = NULL;*/
+  FileHandle *fh = handle_read_file (filename);
+  guint buflen = handle_length (fh);
+
+  (* encoded_size) = 0;
+
+  readbuf0 = g_new (guint8, buflen);
+
+  for (;;)
+    {
+      int c = handle_gets (fh, readbuf0, buflen);
+
+      readbuf = readbuf0;
+
+      if (c < 0)
+	break;
+
+      if (strncmp (readbuf, "text", 4) == 0)
+	in_text = TRUE;
+
+      if (! in_string && readbuf[0] == '@')
+	{
+	  string_start = current_pos + 1;
+	  in_string = TRUE;
+	  readbuf += 1;
+	}
+
+      current_pos += c;
+
+      if (in_string)
+	{
+	  while ((readbuf = strchr (readbuf, '@')))
+	    {
+	      if (readbuf[1] == '@')
+		{
+		  string_start += 1; /* @@@ bogus, just counting. */
+		  readbuf += 2;
+		  continue;
+		}
+
+	      in_string = FALSE;
+	      break;
+	    }
+
+	  string_end = current_pos - 2;
+
+	  if (in_text && ! in_string)
+	    {
+	      in_text = FALSE;
+
+	      /*g_free (current_delta);
+		current_delta = NULL;*/
+
+	      (* encoded_size) += (string_end - string_start);
+	    }
+
+	  continue;
+	}
+
+      if (isdigit (readbuf[0]))
+	{
+#if 0
+	  (* strchr (readbuf, '\n')) = 0;
+	  if (current_delta)
+	    g_free (current_delta);
+	  current_delta = g_strdup (readbuf);
+#endif
+	}
+    }
+
+  handle_close (fh);
+
+  g_free (readbuf0);
+
+#if 0
+  if (current_delta)
+    g_free (current_delta);
+#endif
+
+  return TRUE;
+}
+
+#if 0
+int
+main (int argc, char** argv)
+{
+  guint size;
+
+  if (argc != 2)
+    g_error ("usage: %s RCS_file\n", argv[0]);
+
+  if (! rcs_count (argv[1], &size))
+    g_error ("rcs_parse failed");
+
+  return 0;
+}
+#endif
diff --git a/xdelta3/setup.py b/xdelta3/setup.py
new file mode 100755
index 0000000..9d717e8
--- /dev/null
+++ b/xdelta3/setup.py
@@ -0,0 +1,33 @@
+# xdelta 3 - delta compression tools and library
+# Copyright (C) 2004 and onward.  Joshua P. MacDonald
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+#
+from distutils.core import setup, Extension
+
+xdelta3_ext = Extension('xdelta3',
+                        ['xdelta3.c'],
+                        define_macros = [('PYTHON_MODULE',1),
+                                         ('XD3_POSIX',1),
+                                         ('REGRESSION_TEST',1),
+                                         ('XD3_DEBUG',1),
+                                         ('EXTCOMP',1),
+                                         ('VCDIFF_TOOLS',1),
+                                         ('XD3_USE_LARGEFILE64',1)])
+
+setup(name='xdelta3',
+      version='pre',
+      ext_modules=[xdelta3_ext])
diff --git a/xdelta3/show.c b/xdelta3/show.c
new file mode 100755
index 0000000..f53f2ca
--- /dev/null
+++ b/xdelta3/show.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#define BUFSZ (1 << 22)
+
+int main(int argc, char **argv)
+{
+  int c;
+  int offset;
+  int bytes;
+
+  if (argc != 3)
+    {
+      fprintf (stderr, "usage: show offset bytes\n");
+      return 1;
+    }
+
+  offset = atoi (argv[1]);
+  bytes  = atoi (argv[2]);
+
+  for (; offset != 0; offset -= 1)
+    {
+      if ((c = fgetc (stdin)) == EOF)
+	{
+	  fprintf (stderr, "EOF before offset\n");
+	}
+    }
+
+  for (; bytes != 0; bytes -= 1)
+    {
+      if ((c = fgetc (stdin)) == EOF)
+	{
+	  fprintf (stderr, "\nEOF before offset + bytes\n");
+	}
+
+      fprintf (stderr, "%02x", c);
+    }
+
+  fprintf (stderr, "\n");
+  return 0;
+}
diff --git a/xdelta3/testh.c b/xdelta3/testh.c
new file mode 100755
index 0000000..1be01df
--- /dev/null
+++ b/xdelta3/testh.c
@@ -0,0 +1 @@
+#include "xdelta3.h"
diff --git a/xdelta3/www/xdelta3-api-guide.html b/xdelta3/www/xdelta3-api-guide.html
new file mode 100755
index 0000000..b3513ea
--- /dev/null
+++ b/xdelta3/www/xdelta3-api-guide.html
@@ -0,0 +1,212 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+  <title>Xdelta3 API guide (BETA)</title>
+  <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
+  <link rel="stylesheet" type="text/css" href="xdelta3.css"/>
+</head>
+<body>
+
+<!-- $Format: "$WWWLeftNavBar$" $ --!>
+<table cellpadding="20px" width=700> <tr> <td class="leftbdr" valign=top height=600 width=100> <div class="leftbody"> <h1>Xdelta</h1> <a href="xdelta3.html">overview</a><br> <a href="xdelta3-cmdline.html">command&nbsp;line</a><br> <a href="xdelta3-api-guide.html">api&nbsp;guide</a><br> <br><a href="http://xdelta.org">xdelta.org</a></h2> </div> </td> <td valign=top width=500>
+
+
+<!-- Copyright (C) 2003 and onward. Joshua P. MacDonald --!>
+
+<h1>api guide</h1>
+
+<p>This guide intends to give you a quick start to the Xdelta3 programming
+interface.  This is not a complete reference, the comments inside source file
+<code>xdelta3.h</code> and the command-line application,
+<code>xdelta3-main.h</code> offer more complete information.</p>
+
+<p>Have you read the <a href="xdelta3-cmdline.html">command-line interface</a>?</p>
+
+<h1>stream interface</h1>
+
+<p>
+To begin with, there are three external structures, only two of which are
+discussed here.  The <code>xd3_stream</code> struct plays the main role, one
+of these contains the state necessary to encode or decode one stream of data.
+An <code>xd3_source</code> struct maintains state about the (optional) source
+file, against which differences are computed.  The third structure,
+<code>xd3_config</code> deals with configuring various encoder parameters.</p>
+
+<p>
+At a glance, the interface resembles Zlib.  The program puts data in, which
+the xd3_stream consumes.  After computing over the data, the xd3_stream in
+turn generates output for the application to consume, or it requests more
+input.  The xd3_stream also issues requests to the application to read a block
+of source data.  The request to read a source block may be handled in one of
+two ways, according to application preference.  If a <code>xd3_getblk</code>
+callback function is provided, the application handler will be called from
+within the library, suspending computation until the request completes.  If no
+callback function is provided the library returns a special code
+(XD3_GETSRCBLK), allowing the application to issue the request and resume
+computation whenever it likes.  In both cases, the xd3_source struct contains
+the requested block number and a place to store the result.</p>
+
+<h1>setup</h1>
+<p>The code to declare and initialize the xd3_stream:</p>
+<div class="example">
+<pre>
+int ret;
+xd3_stream stream;
+xd3_config config;
+
+xd3_init_config (&config, 0 /* flags */);
+config.winsize = 32768;
+ret = xd3_config_stream (&stream, &config);
+
+if (ret != 0) { /* error */ }
+</pre>
+</div>
+
+<p>
+<code>xd3_init_config()</code> initializes the <code>xd3_config</code> struct
+with default values.  Many settings remain undocumented in the beta release.
+The most relevant setting, <code>xd3_config.winsize</code>, sets the encoder
+window size.  The encoder allocates a buffer of this size if the program
+supplies input in smaller units (unless the <code>XD3_FLUSH</code> flag is
+set). <code>xd3_config_stream()</code> initializes the <code>xd3_stream</code>
+object with the supplied configuration.
+</p>
+
+<h1>setting the source</h1>
+<p>
+The stream is ready for input at this point, though for encoding the source
+data must be supplied now.  To declare an initialize the xd3_source:</p>
+
+<div class="example">
+<pre>
+xd3_source source;
+void *IO_handle = ...;
+
+source.name = "...";
+source.size = file_size;
+source.ioh= IO_handle;
+source.blksize= 32768;
+source.curblkno = (xoff_t) -1;
+source.curblk = NULL;
+
+ret = xd3_set_source (&stream, &source);
+
+if (ret != 0) { /* error */ }
+</pre>
+</div>
+
+<p>
+The decoder sets source data in the same manner, but it may delay this step
+until the application header has been received (<code>XD3_GOTHEADER</code>).
+The application can also check whether source data is required for decoding
+with the <code>xd3_decoder_needs_source()</code>.</p>
+
+<p>
+<code>xd3_source.blksize</code> determines the block size used for requesting
+source blocks.  If the first source block (or the entire source) is already in
+memory, set <code>curblkno</code> to 0 and <code>curblk</code> to that block
+of data.</p>
+
+<h1>input/output loop</h1>
+
+<p>The stream is now ready for input, which the application provides by
+calling <code>xd3_avail_input()</code>.  The application initiates
+encoding or decoding at this point by calling one of two functions:</p>
+
+<div class="example">
+<pre>
+int xd3_encode_input (xd3_stream *stream)
+int xd3_decode_input (xd3_stream *stream)
+</pre>
+</div>
+
+<p>Unless there is an error, these routines return one of six result
+codes which the application must handle.  In many cases, all or most
+of the handler code is shared between encoding and decoding.  The
+codes are:</p>
+
+<ul>
+<li> <code>XD3_INPUT</code>: The stream is ready for (or requires) more input.  The
+application should call xd3_avail_input when (if) more data is
+available.
+
+<li> <code>XD3_OUTPUT</code>: The stream has pending output.  The application
+should write or otherwise consume the block of data found in the
+xd3_stream fields <code>next_out</code> and <code>avail_out</code>,
+then call <code>xd3_consume_output</code>.
+
+<li> <code>XD3_GETSRCBLK</code>: The stream is requesting a source block be read,
+as described above.  This is only ever returned if the xd3_getblk
+callback was not provided.
+
+<li> <code>XD3_GOTHEADER</code>: This decoder-specific code indicates that the
+first VCDIFF window header has been received.  This gives the
+application a chance to inspect the application header before
+encoding the first window.
+
+<li> <code>XD3_WINSTART</code>: This is returned by both encoder and decoder prior to
+processing a window.  For encoding, this code is returned once there is enough
+available input.  For decoding, this is returned following each window header
+(except the first, when XD3_GOTHEADER is returned instead).
+
+<li> <code>XD3_WINFINISH</code>: This is called when the output from a single
+window has been fully consumed.
+</ul>
+
+<p>An application could be structured something like this:</p>
+
+<div class="example">
+<pre>
+do {
+  read (&indata, &insize);
+  if (reached_EOF) {
+    xd3_set_flags (&stream, XD3_FLUSH);
+  }
+  xd3_avail_input (&stream, indata, insize);
+process:
+  ret = xd3_xxcode_input (&stream);
+  switch (ret) {
+  case XD3_INPUT:
+    continue;
+  case XD3_OUTPUT:
+    /* write data */
+    goto process;
+  case XD3_GETSRCBLK:
+    /* set source block */
+    goto process;
+  case XD3_GOTHEADER:
+  case XD3_WINSTART:
+  case XD3_WINFINISH:
+    /* no action necessary */
+    goto process;
+  default:
+    /* error */
+  }
+} while (! reached_EOF);
+</pre>
+</div>
+
+<p>
+All that remains is to close the stream and free its resources.  The
+<code>xd3_close_stream()</code> checks several error conditions but otherwise
+involves no input or output.  The <code>xd3_free_stream()</code> routine frees
+all memory allocated by the stream.</p>
+
+<h1>misc</h1>
+
+<p>
+There are two convenience functions for encoding to and decoding from
+in-memory buffers.  See the <code>xd3_encode_completely</code> and
+<code>xd3_decode_completely</code> interfaces.</p>
+
+<p>
+There are two routines to get and set the application header.  When
+encoding, sthe application header must be set before the first
+<code>XD3_WINSTART</code>.  When decoding, the application header is available
+after after the first <code>XD3_GOTHEADER</code>.</p>
+
+</td>
+</tr>
+</table>
+</body>
+</html>
diff --git a/xdelta3/www/xdelta3-cmdline.html b/xdelta3/www/xdelta3-cmdline.html
new file mode 100755
index 0000000..cabb547
--- /dev/null
+++ b/xdelta3/www/xdelta3-cmdline.html
@@ -0,0 +1,166 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+  <title>Xdelta3 command-line guide (BETA)</title>
+  <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
+  <link rel="stylesheet" type="text/css" href="xdelta3.css"/>
+</head>
+<body>
+
+<!-- $Format: "$WWWLeftNavBar$" $ --!>
+<table cellpadding="20px" width=700> <tr> <td class="leftbdr" valign=top height=600 width=100> <div class="leftbody"> <h1>Xdelta</h1> <a href="xdelta3.html">overview</a><br> <a href="xdelta3-cmdline.html">command&nbsp;line</a><br> <a href="xdelta3-api-guide.html">api&nbsp;guide</a><br> <br><a href="http://xdelta.org">xdelta.org</a></h2> </div> </td> <td valign=top width=500>
+
+
+<!-- Copyright (C) 2003 and onward. Joshua P. MacDonald --!>
+
+<h1>command-line guide</h1>
+
+<code>xdelta3</code> can be run with syntax familiar but not similar to gzip;
+it requires you to specify the output file in most cases, rather than applying
+any default filename extensions.  These are cases that resemble gzip:<p>
+
+<div class="example">
+<pre>
+xdelta3 -c file_to_compress > delta_file
+xdelta3 -dc delta_file > file_uncompressed
+</pre>
+</div>
+<p>
+
+The <code>-c</code> option says to write to the standard output.  The
+<code>-d</code> option says to decode.  The default action is to encode (also
+specified by <code>-e</code>).  <code>xdelta3</code> also supports long
+command names, these two commands are equivalent to the ones abvove:<p>
+
+<div class="example">
+<pre>
+xdelta3 encode file_to_compress > delta_file
+xdelta3 decode delta_file > file_uncompressed
+</pre>
+</div>
+<p>
+
+<code>xdelta3</code> has the notion of a default filename for decoding.  If
+you specified a file name during the encode step, it is used as the default
+for decoding.  The <code>-s</code> option specifies a <em>source file</em> for
+delta-compression.<p>
+
+<div class="example">
+<pre>
+xdelta3 -s source_file target_file delta_file
+xdelta3 -d delta_file
+</pre>
+</div>
+<p>
+
+The second line above fills in "source_file" and "target_file" as the input
+and output filenames.  Without the <code>-f</code> option,
+<code>xdelta3</code> will not overwrite an existing file.  When there are no
+default filenames (e.g., in decode), standard input and standard output are
+used.  In the example below, the default source filename is applied in
+decoding.
+<p>
+
+<div class="example">
+<pre>
+cat target_file | xdelta3 -s source_file > delta_file
+xdelta3 -d < delta_file > target_file.1
+</pre>
+</div>
+<p>
+
+<code>xdelta3</code> recognizes externally compressed inputs, so the following
+command produces the expected results:<p>
+
+<div class="example">
+<pre>
+xdelta3 -s beta2.tar.gz beta3.tar.gz beta3.tar.gz.xd
+xdelta3 -ds beta2.tar.gz beta3.tar.gz.xd beta3.tar.gz.1
+</pre>
+</div>
+<p>
+
+You can avoid the intermediate file and use <code>xdelta3</code> together
+with a <code>tar</code>-pipeline.
+
+<div class="example">
+<pre>
+tar -cf - beta3 | xdelta3 -s beta2.tar > beta3.tar.xd
+xdelta3 -d beta3.tar.xd | tar -xf -
+</pre>
+</div>
+<p>
+
+<code>xdelta</code> can print various information about a compressed file with
+the "printhdr" command.  The "printhdrs" command prints information about each
+<em>window</em> of the encoding.  The "printdelta" command prints the actual
+encoding for each window, in human-readable format.<p>
+
+<div class="example">
+<pre>
+# xdelta3 printdelta delta_file
+VCDIFF version:               0
+VCDIFF header size:           5
+VCDIFF header indicator:      none
+VCDIFF secondary compressor:  none
+VCDIFF window number:         0
+VCDIFF window indicator:      VCD_SOURCE VCD_ADLER32 
+VCDIFF adler32 checksum:      48BFADB6
+VCDIFF copy window length:    2813
+VCDIFF copy window offset:    0
+VCDIFF delta encoding length: 93
+VCDIFF target window length:  2903
+VCDIFF data section length:   72
+VCDIFF inst section length:   8
+VCDIFF addr section length:   3
+  Offset Code Type1 Size1 @Addr1 + Type2 Size2 @Addr2
+  000000 019  CPY_0 1535 @0     
+  001535 001  ADD     72        
+  001607 019  CPY_0 1296 @1517  
+</pre>
+</div>
+<br>
+<p>
+
+
+<h1>xdelta3 -h</h1>
+
+<pre>
+usage: xdelta3 [command/options] [input [output]]
+commands are:
+    encode      encodes the input
+    decode      decodes the input
+    config      prints xdelta3 configuration
+    test        run the builtin tests
+special commands for VCDIFF inputs:
+    printhdr    print information about the first window
+    printhdrs   print information about all windows
+    printdelta  print information about the entire delta
+options are:
+   -c           use stdout instead of default
+   -d           same as decode command
+   -e           same as encode command
+   -f           force overwrite
+   -n           disable checksum (encode/decode)
+   -D           disable external decompression (encode/decode)
+   -R           disable external recompression (decode)
+   -N           disable small string-matching compression
+   -S [djw|fgk] disable/enable secondary compression
+   -A [apphead] disable/provide application header
+   -s source    source file to copy from (if any)
+   -B blksize   source file block size
+   -W winsize   input window buffer size
+   -v           be verbose (max 2)
+   -q           be quiet
+   -h           show help
+   -V           show version
+   -P           repeat count (for profiling)
+</pre>
+<p>
+
+</td>
+</tr>
+</table>
+
+</body>
+</html>
diff --git a/xdelta3/www/xdelta3.css b/xdelta3/www/xdelta3.css
new file mode 100755
index 0000000..269b1c9
--- /dev/null
+++ b/xdelta3/www/xdelta3.css
@@ -0,0 +1,69 @@
+body {
+  margin-top: 15px;
+  margin-left: 15px;
+  background-color:#b0b0b0;
+  color:#204080;
+  font-family: serif;
+  word-spacing: 0.5pt;
+  text-indent: 0pt;
+}
+
+A:visited {
+  color: #204080;
+}
+A:link {
+  color: #102040;
+}
+h1 {
+  color: #103060;
+  font-size: 150%;
+}
+
+h2 {
+  color: #103060;
+  font-size: 80%;
+}
+
+code, pre {
+  font-family: monospace;
+}
+
+pre {
+  color: #102040;
+}
+
+code {
+  color:#0060c0;
+}
+
+.example {
+  margin-right: 20px;
+  margin-left: 20px;
+
+  padding-left: 20px;
+  padding-right: 20px;
+  padding-top: 0px;
+  padding-bottom: 0px;
+
+  background-color: #808080;
+  border-style: solid;
+  border-width: 1px;
+  border-color: #000000;
+}
+
+.leftbdr {
+  font-family: sans-serif;
+  color: #103060;
+  background-color: #606060;
+  border-style: solid;
+  border-width: 1px;
+  border-color: #000000;
+}
+.leftbody A:visited {
+  color: #102040;
+  text-decoration: none;
+}
+.leftbody A:link {
+  color: #102040;
+  text-decoration: none;
+}
diff --git a/xdelta3/www/xdelta3.html b/xdelta3/www/xdelta3.html
new file mode 100755
index 0000000..3bddfd9
--- /dev/null
+++ b/xdelta3/www/xdelta3.html
@@ -0,0 +1,89 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+  <title>Xdelta3 delta compression library (BETA)</title>
+  <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
+  <link rel="stylesheet" type="text/css" href="xdelta3.css"/>
+</head>
+<body>
+
+<!-- $Format: "$WWWLeftNavBar$" $ --!>
+<table cellpadding="20px" width=700> <tr> <td class="leftbdr" valign=top height=600 width=100> <div class="leftbody"> <h1>Xdelta</h1> <a href="xdelta3.html">overview</a><br> <a href="xdelta3-cmdline.html">command&nbsp;line</a><br> <a href="xdelta3-api-guide.html">api&nbsp;guide</a><br> <br><a href="http://xdelta.org">xdelta.org</a></h2> </div> </td> <td valign=top width=500>
+
+<!-- Copyright (C) 2003 and onward. Joshua P. MacDonald --!>
+
+<h1>version three?</h1>
+
+Xdelta3 is the third and latest release of Xdelta, which is a set of tools and
+APIs for reading and writing compressed <em>deltas</em>.  Deltas encode the
+differences between two versions of a document.  This release features a
+completely new compression engine, several algorithmic improvements, a fully
+programmable interface modelled after zlib, in addition to a command-line
+utility, use of the RFC3284 (VCDIFF) encoding, a python extension, and now
+64-bit support.<p>
+
+Xdelta3 is <em>tiny</em>.  A minimal, fully functional VCDIFF decoder library
+pipes in at 16KB.  The command-line utility complete with encoder/decoder
+tools, external compression support, and the <code>djw</code> secondary
+compression routines, is just under 60KB, slightly larger than a
+<code>gzip</code> executable.<p>
+
+Xdelta3 has few dependencies because it's capable of stand-alone file
+compression (i.e., what zlib and gzip do).  The stand-alone compression of
+Xdelta3/VCDIFF is 10-20% worse than <code>gzip</code>, you may view this as
+paying for the convenience-cost of having a single encoding, tool, and api
+designed to do both <em>data-compression</em> and <em>differencing</em> at
+once.<p>
+
+The Xdelta3 command-line tool, <code>xdelta3</code>, supports several
+convenience routines.  Delta compression works when the two inputs are
+similar, but often we would like to compute the difference between two
+compressed documents.  <code>xdelta3</code> has (optional) support to
+recognize externally compressed inputs and process them correctly.  This
+support is facilitated, in part, using the VCDIFF <em>application header</em>
+field to store <code>xdelta3</code> meta-data, which includes the original
+file names (if any) and codes to incidate whether the inputs were externally
+compressed.  Applications may provide their own application header.<p>
+
+<h1>what are version one and version two?</h1>
+
+Many shortcomings in the Xdelta1.x release are fixed in its replacement,
+Xdelta3.  Xdelta1 used both a simplistic compression algorithm and a
+simplistic encoding.  For example, Xdelta1 compresses the entire document at
+once and thus uses memory proportional to the input size.<p>
+
+The Xdelta1 compression engine made no attempt to find matching strings
+smaller than say 16 or 32 bytes, and the encoding does not attempt to
+efficiently encode the <code>COPY</code> and <code>ADD</code> instructions
+which constitute a delta.  For documents with highly similar data, however,
+these techniques degrade performance by a relatively insignificant amount.
+(Xdelta1.x compresses the delta with Zlib to improve matters, but this
+dependency stinks.)<p>
+
+Despite leaving much to be desired, Xdelta1 showed that you can do well
+without great complexity; as it turns out, the particulars of the compression
+aengine are a relatively insignificant compared to the difficulty of
+programming an application that uses delta-compression.  Better solve that
+first.<p>
+
+What we want are <em>systems</em> that manage compressed storage and network
+communication.  The second major release, Xdelta2, addresses these issues.
+Xdelta2 features a storage interface -- part database and part file system --
+which allows indexing and labeling compressed documents.  The feature set is
+similar to RCS.  The Xdelta2 interface supports efficient algorithms for
+<em>extracting</em> deltas between any pair of versions in storage.  The
+extraction technique also does not rely on hierarchy or centralizing the
+namespace, making the techniques ideal for peer-to-peer communication and
+proxy architectures.  I am grateful to Mihut Ionescu for implementing the
+Xproxy HTTP delta-compressing proxy system based on this interface and
+studying the benefits of delta-compression in that context.  Xdelta2 stressed
+the Xdelta1 compression engine beyond its limits; so Xdelta3 is designed as
+the ideal replacement.  The Xdelta2 techniques are yet to be ported to the new
+implementation.<p>
+
+</td>
+</tr>
+</table>
+
+</body>
+</html>
diff --git a/xdelta3/xdelta3-cfgs.h b/xdelta3/xdelta3-cfgs.h
new file mode 100755
index 0000000..329f3e9
--- /dev/null
+++ b/xdelta3/xdelta3-cfgs.h
@@ -0,0 +1,118 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2001 and onward.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/******************************************************************************************
+ SOFT string matcher
+ ******************************************************************************************/
+
+#if XD3_BUILD_SOFT
+
+#define TEMPLATE      soft
+#define LLOOK         stream->large_look
+#define LSTEP         stream->large_step
+#define SLOOK         stream->small_look
+#define SCHAIN        stream->small_chain
+#define SLCHAIN       stream->small_lchain
+#define SSMATCH       stream->ssmatch
+#define TRYLAZY       stream->try_lazy
+#define MAXLAZY       stream->max_lazy
+#define LONGENOUGH    stream->long_enough
+#define PROMOTE       stream->promote
+
+#define SOFTCFG 1
+#include "xdelta3.c"
+#undef  SOFTCFG
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  SSMATCH
+#undef  TRYLAZY
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#undef  PROMOTE
+#endif
+
+#define SOFTCFG 0
+
+/******************************************************************************************
+ FAST string matcher
+ ******************************************************************************************/
+#if XD3_BUILD_FAST
+#define TEMPLATE      fast
+#define LLOOK         32
+#define LSTEP         32
+#define SLOOK         4
+
+#define SCHAIN        2 // For testcase/3, this produces miserable performance
+#define SLCHAIN       2 // with these values != 1 and large input window size
+
+#define SSMATCH       1
+#define TRYLAZY       0
+#define MAXLAZY       0
+#define LONGENOUGH    64
+#define PROMOTE       0
+
+#include "xdelta3.c"
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  SSMATCH
+#undef  TRYLAZY
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#undef  PROMOTE
+#endif
+
+/******************************************************************************************
+ SLOW string matcher
+ ******************************************************************************************/
+#if XD3_BUILD_SLOW
+#define TEMPLATE      slow
+#define LLOOK         64
+#define LSTEP         64 // TODO
+#define SLOOK         4
+#define SCHAIN        128
+#define SLCHAIN       16
+#define SSMATCH       0
+#define TRYLAZY       1
+#define MAXLAZY       8
+#define LONGENOUGH    128
+#define PROMOTE       0
+
+#include "xdelta3.c"
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  SSMATCH
+#undef  TRYLAZY
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#undef  PROMOTE
+#endif
diff --git a/xdelta3/xdelta3-djw.h b/xdelta3/xdelta3-djw.h
new file mode 100755
index 0000000..90f58e2
--- /dev/null
+++ b/xdelta3/xdelta3-djw.h
@@ -0,0 +1,1917 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2002 and onward.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _XDELTA3_DJW_H_
+#define _XDELTA3_DJW_H_
+
+/* The following people deserve much credit for the algorithms and techniques contained in
+ * this file:
+
+ Julian Seward
+ Bzip2 sources, implementation of the multi-table Huffman technique.
+
+ Jean-loup Gailly and Mark Adler and L. Peter Deutsch
+ Zlib source code, RFC 1951
+
+ Daniel S. Hirschberg and Debra A. LeLewer
+ "Efficient Decoding of Prefix Codes"
+ Communications of the ACM, April 1990 33(4).
+
+ David J. Wheeler
+ Program bred3.c, bexp3 and accompanying documents bred3.ps, huff.ps.
+ This contains the idea behind the multi-table Huffman and 1-2 coding techniques.
+ ftp://ftp.cl.cam.ac.uk/users/djw3/
+
+*/
+
+/* OPT: during the multi-table iteration, pick the worst-overall performing table and
+ * replace it with exactly the frequencies of the worst-overall performing sector or
+ * N-worst performing sectors. */
+
+/* REF: See xdfs-0.222 and xdfs-0.226 for some old experiments with the Bzip prefix coding
+ * strategy.  xdfs-0.256 contains the last of the other-format tests, including RFC1950
+ * and the RFC1950+MTF tests. */
+
+#define DJW_MAX_CODELEN      32 /* Maximum length of an alphabet code. */
+
+#define DJW_TOTAL_CODES      (DJW_MAX_CODELEN+2) /* [RUN_0, RUN_1, 1-DJW_MAX_CODELEN] */
+
+#define RUN_0                0 /* Symbols used in MTF+1/2 coding. */
+#define RUN_1                1
+
+#define DJW_BASIC_CODES      5  /* Number of code lengths always encoded (djw_encode_basic array) */
+#define DJW_RUN_CODES        2  /* Number of run codes */
+#define DJW_EXTRA_12OFFSET   7  /* Offset of extra codes */
+#define DJW_EXTRA_CODES      15 /* Number of optionally encoded code lengths (djw_encode_extra array) */
+#define DJW_EXTRA_CODE_BITS  4  /* Number of bits to code [0-DJW_EXTRA_CODES] */
+
+#define DJW_MAX_GROUPS       8  /* Max number of group coding tables */
+#define DJW_GROUP_BITS       3  /* Number of bits to code [1-DJW_MAX_GROUPS] */
+
+#define DJW_SECTORSZ_MULT     5  /* Multiplier for encoded sectorsz */
+#define DJW_SECTORSZ_BITS     5  /* Number of bits to code group size */
+#define DJW_SECTORSZ_MAX      ((1 << DJW_SECTORSZ_BITS) * DJW_SECTORSZ_MULT)
+
+#define DJW_MAX_ITER         6  /* Maximum number of iterations to find group tables. */
+#define DJW_MIN_IMPROVEMENT  20 /* Minimum number of bits an iteration must reduce coding by. */
+
+#define DJW_MAX_CLCLEN       15 /* Maximum code length of a prefix code length */
+#define DJW_CLCLEN_BITS      4  /* Number of bits to code [0-DJW_MAX_CLCLEN] */
+
+#define DJW_MAX_GBCLEN       7  /* Maximum code length of a group selector */
+#define DJW_GBCLEN_BITS      3  /* Number of bits to code [0-DJW_MAX_GBCLEN]
+				 * @!@ Actually, should never have zero code lengths here, or
+				 * else a group went unused.  Write a test for this: if a group
+				 * goes unused, eliminate it? */
+
+#define EFFICIENCY_BITS      16 /* It has to save at least this many bits... */
+
+typedef struct _djw_stream   djw_stream;
+typedef struct _djw_heapen   djw_heapen;
+typedef struct _djw_prefix   djw_prefix;
+typedef uint32_t             djw_weight;
+
+/* To enable Huffman tuning code... */
+#ifndef TUNE_HUFFMAN
+#define TUNE_HUFFMAN 0
+#endif
+
+#if TUNE_HUFFMAN == 0
+#define xd3_real_encode_huff xd3_encode_huff
+#define IF_TUNE(x)
+#define IF_NTUNE(x) x
+#else
+static uint xd3_bitsof_output (xd3_output *output, bit_state *bstate);
+#define IF_TUNE(x) x
+#define IF_NTUNE(x)
+static djw_weight tune_freq[DJW_TOTAL_CODES];
+static uint8_t tune_clen[DJW_MAX_GROUPS][ALPHABET_SIZE];
+static usize_t  tune_prefix_bits;
+static usize_t  tune_select_bits;
+static usize_t  tune_encode_bits;
+#endif
+struct _djw_heapen
+{
+  uint32_t depth;
+  uint32_t freq;
+  uint32_t parent;
+};
+
+struct _djw_prefix
+{
+  usize_t   scount;
+  uint8_t *symbol;
+  usize_t   mcount;
+  uint8_t *mtfsym;
+  uint8_t *repcnt;
+};
+
+struct _djw_stream
+{
+  int unused;
+};
+
+/* Each Huffman table consists of 256 "code length" (CLEN) codes, which are themselves
+ * Huffman coded after eliminating repeats and move-to-front coding.  The prefix consists
+ * of all the CLEN codes in djw_encode_basic plus a 4-bit value stating how many of the
+ * djw_encode_extra codes are actually coded (the rest are presumed zero, or unused CLEN
+ * codes).
+ *
+ * These values of these two arrays were arrived at by studying the distribution of min
+ * and max clen over a collection of DATA, INST, and ADDR inputs.  The goal is to specify
+ * the order of djw_extra_codes that is most likely to minimize the number of extra codes
+ * that must be encoded.
+ *
+ * Results: 158896 sections were counted by compressing files (window size 512K) listed
+ * with: `find / -type f ( -user jmacd -o -perm +444 )`
+ *
+ * The distribution of CLEN codes for each efficient invocation of the secondary
+ * compressor (taking the best number of groups/sector size) was recorded.  Then we look at
+ * the distribution of min and max clen values, counting the number of times the value
+ * C_low is less than the min and C_high is greater than the max.  Values >= C_high and <=
+ * C_low will not have their lengths coded.  The results are sorted and the least likely
+ * 15 are placed into the djw_encode_extra[] array in order.  These values are used as
+ * the initial MTF ordering.
+
+ clow[1] = 155119
+ clow[2] = 140325
+ clow[3] = 84072
+ ---
+ clow[4] = 7225
+ clow[5] = 1093
+ clow[6] = 215
+ ---
+ chigh[4] = 1
+ chigh[5] = 30
+ chigh[6] = 218
+ chigh[7] = 2060
+ chigh[8] = 13271
+ ---
+ chigh[9] = 39463
+ chigh[10] = 77360
+ chigh[11] = 118298
+ chigh[12] = 141360
+ chigh[13] = 154086
+ chigh[14] = 157967
+ chigh[15] = 158603
+ chigh[16] = 158864
+ chigh[17] = 158893
+ chigh[18] = 158895
+ chigh[19] = 158896
+ chigh[20] = 158896
+
+*/
+
+static const uint8_t djw_encode_12extra[DJW_EXTRA_CODES] =
+  {
+    9, 10, 3, 11, 2, 12, 13, 1, 14, 15, 16, 17, 18, 19, 20
+  };
+
+static const uint8_t djw_encode_12basic[DJW_BASIC_CODES] =
+  {
+    4, 5, 6, 7, 8,
+  };
+
+/*********************************************************************/
+/*                              DECLS                                */
+/*********************************************************************/
+
+static djw_stream*     djw_alloc           (xd3_stream *stream /*, int alphabet_size */);
+static void            djw_init            (djw_stream *h);
+static void            djw_destroy         (xd3_stream *stream,
+					    djw_stream *h);
+
+#if XD3_ENCODER
+static int             xd3_encode_huff     (xd3_stream   *stream,
+					    djw_stream  *sec_stream,
+					    xd3_output   *input,
+					    xd3_output   *output,
+					    xd3_sec_cfg  *cfg);
+#endif
+
+static int             xd3_decode_huff     (xd3_stream     *stream,
+					    djw_stream    *sec_stream,
+					    const uint8_t **input,
+					    const uint8_t  *const input_end,
+					    uint8_t       **output,
+					    const uint8_t  *const output_end);
+
+/*********************************************************************/
+/*                             HUFFMAN                               */
+/*********************************************************************/
+
+static djw_stream*
+djw_alloc (xd3_stream *stream)
+{
+  return xd3_alloc (stream, sizeof (djw_stream), 1);
+}
+
+static void
+djw_init (djw_stream *h)
+{
+  /* Fields are initialized prior to use. */
+}
+
+static void
+djw_destroy (xd3_stream *stream,
+	     djw_stream *h)
+{
+  xd3_free (stream, h);
+}
+
+
+/*********************************************************************/
+/*                               HEAP                                */
+/*********************************************************************/
+
+static INLINE int
+heap_less (const djw_heapen *a, const djw_heapen *b)
+{
+  return a->freq   < b->freq ||
+    (a->freq  == b->freq &&
+     a->depth  < b->depth);
+}
+
+static INLINE void
+heap_insert (uint *heap, const djw_heapen *ents, uint p, const uint e)
+{
+  /* Insert ents[e] into next slot heap[p] */
+  uint pp = p/2; /* P's parent */
+
+  while (heap_less (& ents[e], & ents[heap[pp]]))
+    {
+      heap[p] = heap[pp];
+      p  = pp;
+      pp = p/2;
+    }
+
+  heap[p] = e;
+}
+
+static INLINE djw_heapen*
+heap_extract (uint *heap, const djw_heapen *ents, uint heap_last)
+{
+  uint smallest = heap[1];
+  uint p, pc, t;
+
+  /* Caller decrements heap_last, so heap_last+1 is the replacement elt. */
+  heap[1] = heap[heap_last+1];
+
+  /* Re-heapify */
+  for (p = 1; ; p = pc)
+    {
+      pc = p*2;
+
+      /* Reached bottom of heap */
+      if (pc > heap_last) { break; }
+
+      /* See if second child is smaller. */
+      if (pc < heap_last && heap_less (& ents[heap[pc+1]], & ents[heap[pc]])) { pc += 1; }
+
+      /* If pc is not smaller than p, heap property re-established. */
+      if (! heap_less (& ents[heap[pc]], & ents[heap[p]])) { break; }
+
+      t = heap[pc];
+      heap[pc] = heap[p];
+      heap[p] = t;
+    }
+
+  return (djw_heapen*) & ents[smallest];
+}
+
+#if XD3_DEBUG
+static void
+heap_check (uint *heap, djw_heapen *ents, uint heap_last)
+{
+  uint i;
+  for (i = 1; i <= heap_last; i += 1)
+    {
+      /* Heap property: child not less than parent */
+      XD3_ASSERT (! heap_less (& ents[heap[i]], & ents[heap[i/2]]));
+    }
+}
+#endif
+
+/*********************************************************************/
+/*                             MTF, 1/2                              */
+/*********************************************************************/
+
+static INLINE usize_t
+djw_update_mtf (uint8_t *mtf, usize_t mtf_i)
+{
+  int k;
+  usize_t sym = mtf[mtf_i];
+
+  for (k = mtf_i; k != 0; k -= 1) { mtf[k] = mtf[k-1]; }
+
+  mtf[0] = sym;
+  return sym;
+}
+
+static INLINE void
+djw_update_1_2 (int *mtf_run, usize_t *mtf_i, uint8_t *mtfsym, djw_weight *freq)
+{
+  int code;
+  
+  do
+    {
+      /* Offset by 1, since any number of RUN_ symbols implies run>0... */
+      *mtf_run -= 1;
+
+      code = (*mtf_run & 1) ? RUN_1 : RUN_0;
+
+      mtfsym[(*mtf_i)++] = code;
+      freq[code] += 1;
+      *mtf_run >>= 1;
+    }
+  while (*mtf_run >= 1);
+
+  *mtf_run = 0;
+}
+
+static void
+djw_init_clen_mtf_1_2 (uint8_t *clmtf)
+{
+  int i, cl_i = 0;
+
+  clmtf[cl_i++] = 0;
+  for (i = 0; i < DJW_BASIC_CODES; i += 1) { clmtf[cl_i++] = djw_encode_12basic[i]; }
+  for (i = 0; i < DJW_EXTRA_CODES; i += 1) { clmtf[cl_i++] = djw_encode_12extra[i]; }
+}
+
+/*********************************************************************/
+/*                           PREFIX CODES                            */
+/*********************************************************************/
+#if XD3_ENCODER
+static usize_t
+djw_build_prefix (const djw_weight *freq, uint8_t *clen, int asize, int maxlen)
+{
+  /* Heap with 0th entry unused, prefix tree with up to ALPHABET_SIZE-1 internal nodes,
+   * never more than ALPHABET_SIZE entries actually in the heap (minimum weight subtrees
+   * during prefix construction).  First ALPHABET_SIZE entries are the actual symbols,
+   * next ALPHABET_SIZE-1 are internal nodes. */
+  djw_heapen ents[ALPHABET_SIZE * 2];
+  uint        heap[ALPHABET_SIZE + 1];
+
+  uint heap_last; /* Index of the last _valid_ heap entry. */
+  uint ents_size; /* Number of entries, including 0th fake entry */
+  int  overflow;  /* Number of code lengths that overflow */
+  uint32_t total_bits;
+  int i;
+
+  IF_DEBUG (uint32_t first_bits = 0);
+
+  /* Insert real symbol frequences. */
+  for (i = 0; i < asize; i += 1)
+    {
+      ents[i+1].freq = freq[i];
+    }
+
+ again:
+
+  /* The loop is re-entered each time an overflow occurs.  Re-initialize... */
+  heap_last = 0;
+  ents_size = 1;
+  overflow  = 0;
+  total_bits = 0;
+
+  /* 0th entry terminates the while loop in heap_insert (its the parent of the smallest
+   * element, always less-than) */
+  heap[0] = 0;
+  ents[0].depth = 0;
+  ents[0].freq  = 0;
+
+  /* Initial heap. */
+  for (i = 0; i < asize; i += 1, ents_size += 1)
+    {
+      ents[ents_size].depth  = 0;
+      ents[ents_size].parent = 0;
+
+      if (ents[ents_size].freq != 0)
+	{
+	  heap_insert (heap, ents, ++heap_last, ents_size);
+	}
+    }
+
+  IF_DEBUG (heap_check (heap, ents, heap_last));
+
+  /* Must be at least one symbol, or else we can't get here. */
+  XD3_ASSERT (heap_last != 0);
+
+  /* If there is only one symbol, fake a second to prevent zero-length codes. */
+  if (unlikely (heap_last == 1))
+    {
+      /* Pick either the first or last symbol. */
+      int s = freq[0] ? asize-1 : 0;
+      ents[s+1].freq = 1;
+      goto again;
+    }
+
+  /* Build prefix tree. */
+  while (heap_last > 1)
+    {
+      djw_heapen *h1 = heap_extract (heap, ents, --heap_last);
+      djw_heapen *h2 = heap_extract (heap, ents, --heap_last);
+
+      ents[ents_size].freq   = h1->freq + h2->freq;
+      ents[ents_size].depth  = 1 + max (h1->depth, h2->depth);
+      ents[ents_size].parent = 0;
+
+      h1->parent = h2->parent = ents_size;
+
+      heap_insert (heap, ents, ++heap_last, ents_size++);
+
+      IF_DEBUG (heap_check (heap, ents, heap_last));
+    }
+
+  /* Now compute prefix code lengths, counting parents. */
+  for (i = 1; i < asize+1; i += 1)
+    {
+      int b = 0;
+
+      if (ents[i].freq != 0)
+	{
+	  int p = i;
+
+	  while ((p = ents[p].parent) != 0) { b += 1; }
+
+	  if (b > maxlen) { overflow = 1; }
+
+	  total_bits += b * freq[i-1];
+	}
+
+      /* clen is 0-origin, unlike ents. */
+      clen[i-1] = b;
+    }
+
+  IF_DEBUG (if (first_bits == 0) first_bits = total_bits);
+
+  if (! overflow)
+    {
+      IF_DEBUG (if (first_bits != total_bits)
+      {
+	P(RINT "code length overflow changed %d bits\n", total_bits - first_bits);
+      });
+      return total_bits;
+    }
+
+  /* OPT: There is a non-looping way to fix overflow shown in zlib, but this is easier
+   * (for now), as done in bzip2. */
+  for (i = 1; i < asize+1; i += 1)
+    {
+      ents[i].freq = ents[i].freq / 2 + 1;
+    }
+
+  goto again;
+}
+
+static void
+djw_build_codes (uint *codes, const uint8_t *clen, int asize DEBUG_ARG (int abs_max))
+{
+  int i, l;
+  int min_clen = DJW_MAX_CODELEN;
+  int max_clen = 0;
+  uint code = 0;
+
+  for (i = 0; i < asize; i += 1)
+    {
+      if (clen[i] > 0 && clen[i] < min_clen)
+	{
+	  min_clen = clen[i];
+	}
+
+      max_clen = max (max_clen, (int) clen[i]);
+    }
+
+  XD3_ASSERT (max_clen <= abs_max);
+
+  for (l = min_clen; l <= max_clen; l += 1)
+    {
+      for (i = 0; i < asize; i += 1)
+	{
+	  if (clen[i] == l) { codes[i] = code++; }
+	}
+
+      code <<= 1;
+    }
+}
+
+/*********************************************************************/
+/*			      MOVE-TO-FRONT                          */
+/*********************************************************************/
+static void
+djw_compute_mtf_1_2 (djw_prefix *prefix,
+		     uint8_t     *mtf,
+		     djw_weight *freq_out,   /* freak out! */
+		     usize_t       nsym)
+{
+  int i, j, k;
+  usize_t sym;
+  usize_t size = prefix->scount;
+  usize_t mtf_i = 0;
+  int mtf_run = 0;
+
+  memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+1));
+
+  for (i = 0; i < size; )
+    {
+      /* OPT: Bzip optimizes this algorithm a little by effectively checking j==0 before
+       * the MTF update. */
+      sym = prefix->symbol[i++];
+
+      for (j = 0; mtf[j] != sym; j += 1) { }
+
+      XD3_ASSERT (j < nsym);
+
+      for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; }
+
+      mtf[0] = sym;
+
+      if (j == 0)
+	{
+	  mtf_run += 1;
+	  continue;
+	}
+
+      if (mtf_run > 0)
+	{
+	  djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
+	}
+
+      /* Non-zero symbols are offset by RUN_1 */
+      prefix->mtfsym[mtf_i++] = j+RUN_1;
+      freq_out[j+RUN_1] += 1;
+    }
+
+  if (mtf_run > 0)
+    {
+      djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
+    }
+
+  prefix->mcount = mtf_i;
+}
+
+static usize_t
+djw_count_freqs (djw_weight *freq, xd3_output *input)
+{
+  xd3_output  *in;
+  usize_t       size = 0;
+
+  memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE);
+
+  /* Freqency counting. OPT: can be accomplished beforehand. */
+  for (in = input; in; in = in->next_page)
+    {
+      const uint8_t *p     = in->base;
+      const uint8_t *p_max = p + in->next;
+
+      size += in->next;
+
+      do { freq[*p++] += 1; } while (p < p_max);
+    }
+
+  IF_DEBUG1 ({int i;
+  P(RINT "freqs: ");
+  for (i = 0; i < ALPHABET_SIZE; i += 1) { P(RINT "%u ", freq[i]); }
+  P(RINT "\n");});
+
+  return size;
+}
+
+static void
+djw_compute_multi_prefix (int          groups,
+			  uint8_t      clen[DJW_MAX_GROUPS][ALPHABET_SIZE],
+			  djw_prefix *prefix)
+{
+  int gp, i;
+      
+  prefix->scount = ALPHABET_SIZE;
+  memcpy (prefix->symbol, clen[0], ALPHABET_SIZE);
+
+  for (gp = 1; gp < groups; gp += 1)
+    {
+      for (i = 0; i < ALPHABET_SIZE; i += 1)
+	{
+	  if (clen[gp][i] == 0)
+	    {
+	      continue;
+	    }
+
+	  prefix->symbol[prefix->scount++] = clen[gp][i];
+	}
+    }
+}
+
+static void
+djw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq)
+{
+  uint8_t clmtf[DJW_MAX_CODELEN+1];
+
+  djw_init_clen_mtf_1_2 (clmtf);
+
+  djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN+1);
+}
+
+static int
+djw_encode_prefix (xd3_stream    *stream,
+		   xd3_output   **output,
+		   bit_state     *bstate,
+		   djw_prefix   *prefix)
+{
+  int ret, i;
+  uint num_to_encode;
+  djw_weight clfreq[DJW_TOTAL_CODES];
+  uint8_t    clclen[DJW_TOTAL_CODES];
+  uint       clcode[DJW_TOTAL_CODES];
+
+  IF_TUNE (memset (clfreq, 0, sizeof (clfreq)));
+
+  /* Move-to-front encode prefix symbols, count frequencies */
+  djw_compute_prefix_1_2 (prefix, clfreq);
+
+  /* Compute codes */
+  djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN);
+  djw_build_codes  (clcode, clclen, DJW_TOTAL_CODES DEBUG_ARG (DJW_MAX_CLCLEN));
+
+  /* Compute number of extra codes beyond basic ones for this template. */
+  num_to_encode = DJW_TOTAL_CODES;
+  while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0) { num_to_encode -= 1; }
+  XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS));
+
+  /* Encode: # of extra codes */
+  if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS,
+			      num_to_encode - DJW_EXTRA_12OFFSET))) { return ret; }
+
+  /* Encode: MTF code lengths */
+  for (i = 0; i < num_to_encode; i += 1)
+    {
+      if ((ret = xd3_encode_bits (stream, output, bstate, DJW_CLCLEN_BITS, clclen[i]))) { return ret; }
+    }
+
+  /* Encode: CLEN code lengths */
+  for (i = 0; i < prefix->mcount; i += 1)
+    {
+      usize_t mtf_sym = prefix->mtfsym[i];
+      usize_t bits    = clclen[mtf_sym];
+      usize_t code    = clcode[mtf_sym];
+
+      if ((ret = xd3_encode_bits (stream, output, bstate, bits, code))) { return ret; }
+    }
+
+  IF_TUNE (memcpy (tune_freq, clfreq, sizeof (clfreq)));
+
+  return 0;
+}
+
+static void
+djw_compute_selector_1_2 (djw_prefix *prefix,
+			  usize_t       groups,
+			  djw_weight *gbest_freq)
+{
+  uint8_t grmtf[DJW_MAX_GROUPS];
+  usize_t i;
+
+  for (i = 0; i < groups; i += 1) { grmtf[i] = i; }
+
+  djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups);
+}
+
+static int
+xd3_encode_howmany_groups (xd3_stream *stream,
+			   xd3_sec_cfg *cfg,
+			   usize_t input_size,
+			   usize_t *ret_groups,
+			   usize_t *ret_sector_size)
+{
+  usize_t cfg_groups = 0;
+  usize_t cfg_sector_size = 0;
+  usize_t sugg_groups = 0;
+  usize_t sugg_sector_size = 0;
+
+  if (cfg->ngroups != 0)
+    {
+      if (cfg->ngroups < 0 || cfg->ngroups > DJW_MAX_GROUPS)
+	{
+	  stream->msg = "invalid secondary encoder group number";
+	  return EINVAL;
+	}
+
+      cfg_groups = cfg->ngroups;
+    }
+
+  if (cfg->sector_size != 0)
+    {
+      if (cfg->sector_size < DJW_SECTORSZ_MULT || cfg->sector_size > DJW_SECTORSZ_MAX || (cfg->sector_size % DJW_SECTORSZ_MULT) != 0)
+	{
+	  stream->msg = "invalid secondary encoder sector size";
+	  return EINVAL;
+	}
+
+      cfg_sector_size = cfg->sector_size;
+    }
+
+  if (cfg_groups == 0 || cfg_sector_size == 0)
+    {
+      /* These values were found empirically using xdelta3-tune around version
+       * xdfs-0.256. */
+      switch (cfg->data_type)
+	{
+	case DATA_SECTION:
+	  if      (input_size < 1000)   { sugg_groups = 1; sugg_sector_size = 0; }
+	  else if (input_size < 4000)   { sugg_groups = 2; sugg_sector_size = 10; }
+	  else if (input_size < 7000)   { sugg_groups = 3; sugg_sector_size = 10; }
+	  else if (input_size < 10000)  { sugg_groups = 4; sugg_sector_size = 10; }
+	  else if (input_size < 25000)  { sugg_groups = 5; sugg_sector_size = 10; }
+	  else if (input_size < 50000)  { sugg_groups = 7; sugg_sector_size = 20; }
+	  else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; }
+	  else                          { sugg_groups = 8; sugg_sector_size = 70; }
+	  break;
+	case INST_SECTION:
+	  if      (input_size < 7000)   { sugg_groups = 1; sugg_sector_size = 0; }
+	  else if (input_size < 10000)  { sugg_groups = 2; sugg_sector_size = 50; }
+	  else if (input_size < 25000)  { sugg_groups = 3; sugg_sector_size = 50; }
+	  else if (input_size < 50000)  { sugg_groups = 6; sugg_sector_size = 40; }
+	  else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; }
+	  else                          { sugg_groups = 8; sugg_sector_size = 40; }
+	  break;
+	case ADDR_SECTION:
+	  if      (input_size < 9000)   { sugg_groups = 1; sugg_sector_size = 0; }
+	  else if (input_size < 25000)  { sugg_groups = 2; sugg_sector_size = 130; }
+	  else if (input_size < 50000)  { sugg_groups = 3; sugg_sector_size = 130; }
+	  else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; }
+	  else                          { sugg_groups = 7; sugg_sector_size = 130; }
+	  break;
+	}
+
+      if (cfg_groups == 0)
+	{
+	  cfg_groups = sugg_groups;
+	}
+
+      if (cfg_sector_size == 0)
+	{
+	  cfg_sector_size = sugg_sector_size;
+	}
+    }
+
+  if (cfg_groups != 1 && cfg_sector_size == 0)
+    {
+      switch (cfg->data_type)
+	{
+	case DATA_SECTION:
+	  cfg_sector_size = 20;
+	  break;
+	case INST_SECTION:
+	  cfg_sector_size = 50;
+	  break;
+	case ADDR_SECTION:
+	  cfg_sector_size = 130;
+	  break;
+	}
+    }
+
+  (*ret_groups)     = cfg_groups;
+  (*ret_sector_size) = cfg_sector_size;
+
+  XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS);
+  XD3_ASSERT (cfg_groups == 1 || (cfg_sector_size >= DJW_SECTORSZ_MULT && cfg_sector_size <= DJW_SECTORSZ_MAX));
+
+  return 0;
+}
+
+static int
+xd3_real_encode_huff (xd3_stream   *stream,
+		      djw_stream  *h,
+		      xd3_output   *input,
+		      xd3_output   *output,
+		      xd3_sec_cfg  *cfg)
+{
+  int         ret;
+  usize_t      groups, sector_size;
+  bit_state   bstate = BIT_STATE_ENCODE_INIT;
+  xd3_output *in;
+  int         encode_bits;
+  usize_t      input_bits;
+  usize_t      input_bytes;
+  usize_t      initial_offset = output->next;
+  djw_weight real_freq[ALPHABET_SIZE];
+  uint8_t    *gbest = NULL; /* Dynamic allocations: could put these in djw_stream. */
+  uint8_t    *gbest_mtf = NULL;
+
+  input_bytes = djw_count_freqs (real_freq, input);
+  input_bits  = input_bytes * 8;
+
+  XD3_ASSERT (input_bytes > 0);
+
+  if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes, & groups, & sector_size)))
+    {
+      return ret;
+    }
+
+  if (0)
+    {
+    regroup:
+      /* Sometimes we dynamically decide there are too many groups.  Arrive here. */
+      output->next = initial_offset;
+      xd3_bit_state_encode_init (& bstate);
+    }
+
+  /* Encode: # of groups (3 bits) */
+  if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GROUP_BITS, groups-1))) { goto failure; }
+
+  if (groups == 1)
+    {
+      /* Single Huffman group. */
+      uint        code[ALPHABET_SIZE]; /* Codes */
+      IF_TUNE  (uint8_t    *clen = tune_clen[0];)
+      IF_NTUNE (uint8_t     clen[ALPHABET_SIZE];)
+      uint8_t    prefix_mtfsym[ALPHABET_SIZE];
+      djw_prefix prefix;
+
+      encode_bits =
+	djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
+      djw_build_codes  (code, clen, ALPHABET_SIZE DEBUG_ARG (DJW_MAX_CODELEN));
+
+      if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
+
+      /* Encode: prefix */
+      prefix.mtfsym = prefix_mtfsym;
+      prefix.symbol = clen;
+      prefix.scount = ALPHABET_SIZE;
+
+      if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; }
+
+      if (encode_bits + (8 * output->next) + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
+
+      IF_TUNE (tune_prefix_bits = xd3_bitsof_output (output, & bstate));
+      IF_TUNE (tune_select_bits = 0);
+      IF_TUNE (tune_encode_bits = encode_bits);
+
+      /* Encode: data */
+      for (in = input; in; in = in->next_page)
+	{
+	  const uint8_t *p     = in->base;
+	  const uint8_t *p_max = p + in->next;
+
+	  do
+	    {
+	      usize_t sym  = *p++;
+	      usize_t bits = clen[sym];
+
+	      IF_DEBUG (encode_bits -= bits);
+
+	      if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code[sym]))) { goto failure; }
+	    }
+	  while (p < p_max);
+	}
+
+      XD3_ASSERT (encode_bits == 0);
+    }
+  else
+    {
+      /* DJW Huffman */
+      djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE];
+#if TUNE_HUFFMAN == 0
+      uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE];
+#else
+#define evolve_clen tune_clen
+#endif
+      djw_weight left = input_bytes;
+      int gp;
+      int niter = 0;
+      usize_t select_bits;
+      usize_t sym1 = 0, sym2 = 0, s;
+      usize_t   gcost[DJW_MAX_GROUPS];
+      uint     gbest_code[DJW_MAX_GROUPS+1];
+      uint8_t  gbest_clen[DJW_MAX_GROUPS+1];
+      usize_t   gbest_max = 1 + (input_bytes - 1) / sector_size;
+      int      best_bits = 0;
+      usize_t   gbest_no;
+      usize_t   gpcnt;
+      const uint8_t *p;
+      IF_DEBUG1 (usize_t gcount[DJW_MAX_GROUPS]);
+
+      /* Encode: sector size (5 bits) */
+      if ((ret = xd3_encode_bits (stream, & output, & bstate,
+				  DJW_SECTORSZ_BITS, (sector_size/DJW_SECTORSZ_MULT)-1))) { goto failure; }
+
+      /* Dynamic allocation. */
+      if (gbest     == NULL) { gbest     = xd3_alloc (stream, gbest_max, 1); }
+      if (gbest_mtf == NULL) { gbest_mtf = xd3_alloc (stream, gbest_max, 1); }
+
+      /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */
+
+      /* Generate initial code length tables. */
+      for (gp = 0; gp < groups; gp += 1)
+	{
+	  djw_weight sum  = 0;
+	  djw_weight goal = left / (groups - gp);
+
+	  IF_DEBUG1 (usize_t nz = 0);
+
+	  /* Due to the single-code granularity of this distribution, it may be that we
+	   * can't generate a distribution for each group.  In that case subtract one
+	   * gropu and try again.  If (inefficient), we're testing group behavior, so
+	   * don't mess things up. */
+	  if (goal == 0 && !cfg->inefficient)
+	    {
+	      IF_DEBUG1 (P(RINT "too many groups (%u), dropping one\n", groups));
+	      groups -= 1;
+	      goto regroup;
+	    }
+
+	  /* Sum == goal is possible when (cfg->inefficient)... */
+	  while (sum < goal)
+	    {
+	      XD3_ASSERT (sym2 < ALPHABET_SIZE);
+	      IF_DEBUG1 (nz += real_freq[sym2] != 0);
+	      sum += real_freq[sym2++];
+	    }
+
+	  IF_DEBUG1(P(RINT "group %u has symbols %u..%u (%u non-zero) (%u/%u = %.3f)\n",
+			     gp, sym1, sym2, nz, sum, input_bytes, sum / (double)input_bytes););
+
+	  for (s = 0; s < ALPHABET_SIZE; s += 1)
+	    {
+	      evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16;
+	    }
+
+	  left -= sum;
+	  sym1  = sym2+1;
+	}
+
+    repeat:
+
+      niter += 1;
+      gbest_no = 0;
+      memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups);
+      IF_DEBUG1 (memset (gcount, 0, sizeof (gcount[0]) * groups));
+
+      /* For each input page (loop is irregular to allow non-pow2-size group size. */
+      in = input;
+      p  = in->base;
+
+      /* For each group-size sector. */
+      do
+	{
+	  const uint8_t *p0  = p;
+	  xd3_output    *in0 = in;
+	  usize_t best   = 0;
+	  usize_t winner = 0;
+
+	  /* Select best group for each sector, update evolve_freq. */
+	  memset (gcost, 0, sizeof (gcost[0]) * groups);
+
+	  /* For each byte in sector. */
+	  for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
+	    {
+	      /* For each group. */
+	      for (gp = 0; gp < groups; gp += 1)
+		{
+		  gcost[gp] += evolve_clen[gp][*p];
+		}
+
+	      /* Check end-of-input-page. */
+#             define GP_PAGE()                \
+	      if (++p - in->base == in->next) \
+		{                             \
+		  in = in->next_page;         \
+		  if (in == NULL) { break; }  \
+		  p  = in->base;              \
+		}
+
+	      GP_PAGE ();
+	    }
+
+	  /* Find min cost group for this sector */
+	  best = -1U;
+	  for (gp = 0; gp < groups; gp += 1)
+	    {
+	      if (gcost[gp] < best) { best = gcost[gp]; winner = gp; }
+	    }
+
+	  gbest[gbest_no++] = winner;
+	  IF_DEBUG1 (gcount[winner] += 1);
+
+	  p  = p0;
+	  in = in0;
+
+	  /* Update group frequencies. */
+	  for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
+	    {
+	      evolve_freq[winner][*p] += 1;
+
+	      GP_PAGE ();
+	    }
+	}
+      while (in != NULL);
+
+      XD3_ASSERT (gbest_no == gbest_max);
+
+      /* Recompute code lengths. */
+      encode_bits = 0;
+      for (gp = 0; gp < groups; gp += 1)
+	{
+	  int i;
+	  uint8_t evolve_zero[ALPHABET_SIZE];
+	  int any_zeros = 0;
+
+	  memset (evolve_zero, 0, sizeof (evolve_zero));
+
+	  /* Cannot allow a zero clen when the real frequency is non-zero.  Note: this
+	   * means we are going to encode a fairly long code for these unused entries.  An
+	   * improvement would be to implement a NOTUSED code for when these are actually
+	   * zero, but this requires another data structure (evolve_zero) since we don't
+	   * know when evolve_freq[i] == 0...  Briefly tested, looked worse. */
+	  for (i = 0; i < ALPHABET_SIZE; i += 1)
+	    {
+	      if (evolve_freq[gp][i] == 0 && real_freq[i] != 0)
+		{
+		  evolve_freq[gp][i] = 1;
+		  evolve_zero[i] = 1;
+		  any_zeros = 1;
+		}
+	    }
+
+	  encode_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp], ALPHABET_SIZE, DJW_MAX_CODELEN);
+
+	  /* The above faking of frequencies does not matter for the last iteration, but
+	   * we don't know when that is yet.  However, it also breaks the encode_bits
+	   * computation.  Necessary for accuracy, and for the (encode_bits==0) assert
+	   * after all bits are output. */
+	  if (any_zeros)
+	    {
+	      IF_DEBUG1 (usize_t save_total = encode_bits);
+
+	      for (i = 0; i < ALPHABET_SIZE; i += 1)
+		{
+		  if (evolve_zero[i]) { encode_bits -= evolve_clen[gp][i]; }
+		}
+
+	      IF_DEBUG1 (P(RINT "evolve_zero reduced %u bits in group %u\n", save_total - encode_bits, gp));
+	    }
+	}
+
+      IF_DEBUG1(
+		P(RINT "pass %u total bits: %u group uses: ", niter, encode_bits);
+		for (gp = 0; gp < groups; gp += 1) { P(RINT "%u ", gcount[gp]); }
+		P(RINT "\n"););
+
+      /* End iteration.  (The following assertion proved invalid.) */
+      /*XD3_ASSERT (niter == 1 || best_bits >= encode_bits);*/
+
+      IF_DEBUG1 (if (niter > 1 && best_bits < encode_bits) {
+	P(RINT "iteration lost %u bits\n", encode_bits - best_bits); });
+
+      if (niter == 1 || (niter < DJW_MAX_ITER && (best_bits - encode_bits) >= DJW_MIN_IMPROVEMENT))
+	{
+	  best_bits = encode_bits;
+	  goto repeat;
+	}
+
+      /* Efficiency check. */
+      if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
+
+      IF_DEBUG1 (P(RINT "djw compression: %u -> %0.3f\n", input_bytes, encode_bits / 8.0));
+
+      /* Encode: prefix */
+      {
+	uint8_t     prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE];
+	uint8_t     prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE];
+	uint8_t     prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE];
+	djw_prefix prefix;
+
+	prefix.symbol = prefix_symbol;
+	prefix.mtfsym = prefix_mtfsym;
+	prefix.repcnt = prefix_repcnt;
+
+	djw_compute_multi_prefix (groups, evolve_clen, & prefix);
+	if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; }
+      }
+
+      /* Encode: selector frequencies */
+      {
+	djw_weight gbest_freq[DJW_MAX_GROUPS+1];
+	djw_prefix gbest_prefix;
+	usize_t i;
+
+	gbest_prefix.scount = gbest_no;
+	gbest_prefix.symbol = gbest;
+	gbest_prefix.mtfsym = gbest_mtf;
+
+	djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq);
+
+	select_bits =
+	  djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN);
+	djw_build_codes  (gbest_code, gbest_clen, groups+1  DEBUG_ARG (DJW_MAX_GBCLEN));
+
+	IF_TUNE (tune_prefix_bits = xd3_bitsof_output (output, & bstate));
+	IF_TUNE (tune_select_bits = select_bits);
+	IF_TUNE (tune_encode_bits = encode_bits);
+
+	for (i = 0; i < groups+1; i += 1)
+	  {
+	    if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GBCLEN_BITS, gbest_clen[i]))) { goto failure; }
+	  }
+
+	for (i = 0; i < gbest_prefix.mcount; i += 1)
+	  {
+	    usize_t gp_mtf      = gbest_mtf[i];
+	    usize_t gp_sel_bits = gbest_clen[gp_mtf];
+	    usize_t gp_sel_code = gbest_code[gp_mtf];
+
+	    XD3_ASSERT (gp_mtf < groups+1);
+
+	    if ((ret = xd3_encode_bits (stream, & output, & bstate, gp_sel_bits, gp_sel_code))) { goto failure; }
+
+	    IF_DEBUG (select_bits -= gp_sel_bits);
+	  }
+
+	XD3_ASSERT (select_bits == 0);
+      }
+
+      /* Efficiency check. */
+      if (encode_bits + select_bits + (8 * output->next) + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
+
+      /* Encode: data */
+      {
+	uint evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE];
+	usize_t sector = 0;
+
+	/* Build code tables for each group. */
+	for (gp = 0; gp < groups; gp += 1)
+	  {
+	    djw_build_codes (evolve_code[gp], evolve_clen[gp], ALPHABET_SIZE DEBUG_ARG (DJW_MAX_CODELEN));
+	  }
+
+	/* Now loop over the input. */
+	in = input;
+	p  = in->base;
+
+	do
+	  {
+	    /* For each sector. */
+	    usize_t   gp_best  = gbest[sector];
+	    uint    *gp_codes = evolve_code[gp_best];
+	    uint8_t *gp_clens = evolve_clen[gp_best];
+
+	    XD3_ASSERT (sector < gbest_no);
+
+	    sector += 1;
+
+	    /* Encode the sector data. */
+	    for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
+	      {
+		usize_t sym  = *p;
+		usize_t bits = gp_clens[sym];
+		usize_t code = gp_codes[sym];
+
+		IF_DEBUG (encode_bits -= bits);
+
+		if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code))) { goto failure; }
+
+		GP_PAGE ();
+	      }
+	  }
+	while (in != NULL);
+
+	XD3_ASSERT (select_bits == 0);
+	XD3_ASSERT (encode_bits == 0);
+
+#undef evolve_clen
+      }
+    }
+
+  ret = xd3_flush_bits (stream, & output, & bstate);
+
+  if (0)
+    {
+    nosecond:
+      stream->msg = "secondary compression was inefficient";
+      ret = XD3_NOSECOND;
+    }
+
+ failure:
+
+  xd3_free (stream, gbest);
+  xd3_free (stream, gbest_mtf);
+  return ret;
+}
+#endif /* XD3_ENCODER */
+
+/*********************************************************************/
+/*                              DECODE                               */
+/*********************************************************************/
+
+static void
+djw_build_decoder (xd3_stream    *stream,
+		   usize_t         asize,
+		   usize_t         abs_max,
+		   const uint8_t *clen,
+		   uint8_t       *inorder,
+		   uint          *base,
+		   uint          *limit,
+		   uint          *min_clenp,
+		   uint          *max_clenp)
+{
+  int i, l;
+  const uint8_t *ci;
+  uint nr_clen [DJW_MAX_CODELEN+1];
+  uint tmp_base[DJW_MAX_CODELEN+1];
+  int min_clen;
+  int max_clen;
+
+  /* Assumption: the two temporary arrays are large enough to hold abs_max. */
+  XD3_ASSERT (abs_max <= DJW_MAX_CODELEN);
+
+  /* This looks something like the start of zlib's inftrees.c */
+  memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1));
+
+  /* Count number of each code length */
+  i  = asize;
+  ci = clen;
+  do
+    {
+      /* Caller _must_ check that values are in-range.  Most of the time
+       * the caller decodes a specific number of bits, which imply the max value, and the
+       * other time the caller decodes a huffman value, which must be in-range.  Therefore,
+       * its an assertion and this function cannot otherwise fail. */
+      XD3_ASSERT (*ci <= abs_max);
+
+      nr_clen[*ci++]++;
+    }
+  while (--i != 0);
+
+  /* Compute min, max. */
+  for (i = 1; i <= abs_max; i += 1) { if (nr_clen[i]) { break; } }
+  min_clen = i;
+  for (i = abs_max; i != 0; i -= 1) { if (nr_clen[i]) { break; } }
+  max_clen = i;
+
+  /* Fill the BASE, LIMIT table. */
+  tmp_base[min_clen] = 0;
+  base[min_clen]     = 0;
+  limit[min_clen]    = nr_clen[min_clen] - 1;
+  for (i = min_clen + 1; i <= max_clen; i += 1)
+    {
+      uint last_limit = ((limit[i-1] + 1) << 1);
+      tmp_base[i] = tmp_base[i-1] + nr_clen[i-1];
+      limit[i]    = last_limit + nr_clen[i] - 1;
+      base[i]     = last_limit - tmp_base[i];
+    }
+
+  /* Fill the inorder array, canonically ordered codes. */
+  ci = clen;
+  for (i = 0; i < asize; i += 1)
+    {
+      if ((l = *ci++) != 0)
+	{
+	  inorder[tmp_base[l]++] = i;
+	}
+    }
+
+  *min_clenp = min_clen;
+  *max_clenp = max_clen;
+}
+
+static INLINE int
+djw_decode_symbol (xd3_stream     *stream,
+		   bit_state      *bstate,
+		   const uint8_t **input,
+		   const uint8_t  *input_end,
+		   const uint8_t  *inorder,
+		   const uint     *base,
+		   const uint     *limit,
+		   uint            min_clen,
+		   uint            max_clen,
+		   usize_t         *sym,
+		   usize_t          max_sym)
+{
+  usize_t code = 0;
+  usize_t bits = 0;
+
+  /* OPT: Supposedly a small lookup table improves speed here... */
+
+  /* Code outline is similar to xd3_decode_bits... */
+  if (bstate->cur_mask == 0x100) { goto next_byte; }
+
+  for (;;)
+    {
+      do
+	{
+	  if (bits == max_clen) { goto corrupt; }
+
+	  bits += 1;
+	  code  = (code << 1);
+
+	  if (bstate->cur_byte & bstate->cur_mask) { code |= 1; }
+
+	  IF_DEBUG1 (P(RINT "%u", (bstate->cur_byte & bstate->cur_mask) && 1));
+
+	  bstate->cur_mask <<= 1;
+
+	  if (bits >= min_clen && code <= limit[bits]) { goto done; }
+	}
+      while (bstate->cur_mask != 0x100);
+
+    next_byte:
+
+      if (*input == input_end)
+	{
+	  stream->msg = "secondary decoder end of input";
+	  return EINVAL;
+	}
+
+      bstate->cur_byte = *(*input)++;
+      bstate->cur_mask = 1;
+    }
+
+ done:
+
+  if (base[bits] <= code)
+    {
+      usize_t offset = code - base[bits];
+
+      if (offset <= max_sym)
+	{
+	  IF_DEBUG1 (P(RINT " (%u) ", bits));
+	  *sym = inorder[offset];
+	  return 0;
+	}
+    }
+
+ corrupt:
+  stream->msg = "secondary decoder invalid code";
+  return EINVAL;
+}
+
+static int
+djw_decode_clclen (xd3_stream     *stream,
+		   bit_state      *bstate,
+		   const uint8_t **input,
+		   const uint8_t  *input_end,
+		   uint8_t        *cl_inorder,
+		   uint           *cl_base,
+		   uint           *cl_limit,
+		   uint           *cl_minlen,
+		   uint           *cl_maxlen,
+		   uint8_t        *cl_mtf)
+{
+  int ret;
+  uint8_t cl_clen[DJW_TOTAL_CODES];
+  usize_t num_codes, value;
+  int i;
+
+  /* How many extra code lengths to encode. */
+  if ((ret = xd3_decode_bits (stream, bstate, input, input_end, DJW_EXTRA_CODE_BITS, & num_codes))) { return ret; }
+
+  num_codes += DJW_EXTRA_12OFFSET;
+
+  /* Read num_codes. */
+  for (i = 0; i < num_codes; i += 1)
+    {
+      if ((ret = xd3_decode_bits (stream, bstate, input, input_end, DJW_CLCLEN_BITS, & value))) { return ret; }
+
+      cl_clen[i] = value;
+    }
+
+  /* Set the rest to zero. */
+  for (; i < DJW_TOTAL_CODES; i += 1) { cl_clen[i] = 0; }
+
+  /* No need to check for in-range clen values, because: */
+  XD3_ASSERT (1 << DJW_CLCLEN_BITS == DJW_MAX_CLCLEN + 1);
+
+  /* Build the code-length decoder. */
+  djw_build_decoder (stream, DJW_TOTAL_CODES, DJW_MAX_CLCLEN,
+		     cl_clen, cl_inorder, cl_base, cl_limit, cl_minlen, cl_maxlen);
+
+  /* Initialize the MTF state. */
+  djw_init_clen_mtf_1_2 (cl_mtf);
+
+  return 0;
+}
+
+static INLINE int
+djw_decode_1_2 (xd3_stream     *stream,
+		bit_state      *bstate,
+		const uint8_t **input,
+		const uint8_t  *input_end,
+		const uint8_t  *inorder,
+		const uint     *base,
+		const uint     *limit,
+		const uint     *minlen,
+		const uint     *maxlen,
+		uint8_t        *mtfvals,
+		usize_t          elts,
+		usize_t          skip_offset,
+		uint8_t        *values)
+{
+  usize_t n = 0, rep = 0, mtf = 0, s = 0;
+  int ret;
+  
+  while (n < elts)
+    {
+      /* Special case inside generic code: CLEN only: If not the first group, we already
+       * know the zero frequencies. */
+      if (skip_offset != 0 && n >= skip_offset && values[n-skip_offset] == 0)
+	{
+	  values[n++] = 0;
+	  continue;
+	}
+
+      /* Repeat last symbol. */
+      if (rep != 0)
+	{
+	  values[n++] = mtfvals[0];
+	  rep -= 1;
+	  continue;
+	}
+
+      /* Symbol following last repeat code. */
+      if (mtf != 0)
+	{
+	  usize_t sym = djw_update_mtf (mtfvals, mtf);
+	  values[n++] = sym;
+	  mtf = 0;
+	  continue;
+	}
+
+      /* Decode next symbol/repeat code. */
+      if ((ret = djw_decode_symbol (stream, bstate, input, input_end,
+				    inorder, base, limit, *minlen, *maxlen,
+				    & mtf, DJW_TOTAL_CODES))) { return ret; }
+
+      if (mtf <= RUN_1)
+	{
+	  /* Repetition. */
+	  rep = ((mtf + 1) << s);
+	  mtf = 0;
+	  s += 1;
+	}
+      else
+	{
+	  /* Remove the RUN_1 MTF offset. */
+	  mtf -= 1;
+	  s = 0;
+	}
+    }
+
+  /* If (rep != 0) there were too many codes received. */
+  if (rep != 0)
+    {
+      stream->msg = "secondary decoder invalid repeat code";
+      return EINVAL;
+    }
+  
+  return 0;
+}
+
+static INLINE int
+djw_decode_prefix (xd3_stream     *stream,
+		   bit_state      *bstate,
+		   const uint8_t **input,
+		   const uint8_t  *input_end,
+		   const uint8_t  *cl_inorder,
+		   const uint     *cl_base,
+		   const uint     *cl_limit,
+		   const uint     *cl_minlen,
+		   const uint     *cl_maxlen,
+		   uint8_t        *cl_mtf,
+		   usize_t          groups,
+		   uint8_t        *clen)
+{
+  return djw_decode_1_2 (stream, bstate, input, input_end,
+			 cl_inorder, cl_base, cl_limit, cl_minlen, cl_maxlen, cl_mtf,
+			 ALPHABET_SIZE * groups, ALPHABET_SIZE, clen);
+}
+
+static int
+xd3_decode_huff (xd3_stream     *stream,
+		 djw_stream    *h,
+		 const uint8_t **input_pos,
+		 const uint8_t  *const input_end,
+		 uint8_t       **output_pos,
+		 const uint8_t  *const output_end)
+{
+  const uint8_t *input = *input_pos;
+  uint8_t  *output = *output_pos;
+  bit_state bstate = BIT_STATE_DECODE_INIT;
+  uint8_t  *sel_group = NULL;
+  usize_t    groups, gp;
+  usize_t    output_bytes = (output_end - output);
+  usize_t    sector_size;
+  usize_t    sectors;
+  int ret;
+
+  /* Invalid input. */
+  if (output_bytes == 0)
+    {
+      stream->msg = "secondary decoder invalid input";
+      return EINVAL;
+    }
+
+  /* Decode: number of groups */
+  if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_GROUP_BITS, & groups))) { goto fail; }
+
+  groups += 1;
+
+  if (groups > 1)
+    {
+      /* Decode: group size */
+      if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_SECTORSZ_BITS, & sector_size))) { goto fail; }
+      
+      sector_size = (sector_size + 1) * DJW_SECTORSZ_MULT;
+    }
+  else
+    {
+      /* Default for groups == 1 */
+      sector_size = output_bytes;
+    }
+
+  sectors = 1 + (output_bytes - 1) / sector_size;
+
+  /* @!@ In the case of groups==1, lots of extra stack space gets used here.  Could
+   * dynamically allocate this memory, which would help with excess parameter passing,
+   * too.  Passing too many parameters in this file, simplify it! */
+
+  /* Outer scope: per-group symbol decoder tables. */
+  {
+    uint8_t inorder[DJW_MAX_GROUPS][ALPHABET_SIZE];
+    uint    base   [DJW_MAX_GROUPS][DJW_MAX_CODELEN+1];
+    uint    limit  [DJW_MAX_GROUPS][DJW_MAX_CODELEN+1];
+    uint    minlen [DJW_MAX_GROUPS];
+    uint    maxlen [DJW_MAX_GROUPS];
+
+    /* Nested scope: code length decoder tables. */
+    {
+      uint8_t clen      [DJW_MAX_GROUPS][ALPHABET_SIZE];
+      uint8_t cl_inorder[DJW_TOTAL_CODES];
+      uint    cl_base   [DJW_MAX_CLCLEN+1];
+      uint    cl_limit  [DJW_MAX_CLCLEN+1];
+      uint8_t cl_mtf    [DJW_TOTAL_CODES];
+      uint    cl_minlen;
+      uint    cl_maxlen;
+
+      /* Compute the code length decoder. */
+      if ((ret = djw_decode_clclen (stream, & bstate, & input, input_end,
+				    cl_inorder, cl_base, cl_limit, & cl_minlen,
+				    & cl_maxlen, cl_mtf))) { goto fail; }
+
+      /* Now decode each group decoder. */
+      if ((ret = djw_decode_prefix (stream, & bstate, & input, input_end,
+				    cl_inorder, cl_base, cl_limit,
+				    & cl_minlen, & cl_maxlen, cl_mtf,
+				    groups, clen[0]))) { goto fail; }
+
+      /* Prepare the actual decoding tables. */
+      for (gp = 0; gp < groups; gp += 1)
+	{
+	  djw_build_decoder (stream, ALPHABET_SIZE, DJW_MAX_CODELEN,
+			     clen[gp], inorder[gp], base[gp], limit[gp],
+			     & minlen[gp], & maxlen[gp]);
+	}
+    }
+
+    /* Decode: selector clens. */
+    {
+      uint8_t sel_inorder[DJW_MAX_GROUPS+1];
+      uint    sel_base   [DJW_MAX_GBCLEN+1];
+      uint    sel_limit  [DJW_MAX_GBCLEN+1];
+      uint8_t sel_mtf    [DJW_MAX_GROUPS+1];
+      uint    sel_minlen;
+      uint    sel_maxlen;
+
+      /* Setup group selection. */
+      if (groups > 1)
+	{
+	  uint8_t sel_clen[DJW_MAX_GROUPS+1];
+
+	  for (gp = 0; gp < groups+1; gp += 1)
+	    {
+	      usize_t value;
+
+	      if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_GBCLEN_BITS, & value))) { goto fail; }
+
+	      sel_clen[gp] = value;
+	      sel_mtf[gp]  = gp;
+	    }
+
+	  if ((sel_group = xd3_alloc (stream, sectors, 1)) == NULL) { ret = ENOMEM; goto fail; }
+
+	  djw_build_decoder (stream, groups+1, DJW_MAX_GBCLEN, sel_clen,
+			     sel_inorder, sel_base, sel_limit, & sel_minlen, & sel_maxlen);
+
+	  if ((ret = djw_decode_1_2 (stream, & bstate, & input, input_end,
+				     sel_inorder, sel_base, sel_limit, & sel_minlen, & sel_maxlen, sel_mtf,
+				     sectors, 0, sel_group))) { goto fail; }
+	}
+
+      /* Now decode each sector. */
+      {
+	uint8_t *gp_inorder = inorder[0]; /* Initialize for (groups==1) case. */
+	uint    *gp_base    = base[0];
+	uint    *gp_limit   = limit[0];
+	uint     gp_minlen  = minlen[0];
+	uint     gp_maxlen  = maxlen[0];
+	usize_t c;
+
+	for (c = 0; c < sectors; c += 1)
+	  {
+	    usize_t n;
+
+	    if (groups >= 2)
+	      {
+		gp = sel_group[c];
+
+		XD3_ASSERT (gp < groups);
+
+		gp_inorder = inorder[gp];
+		gp_base    = base[gp];
+		gp_limit   = limit[gp];
+		gp_minlen  = minlen[gp];
+		gp_maxlen  = maxlen[gp];
+	      }
+
+	    XD3_ASSERT (output_end - output > 0);
+	    
+	    /* Decode next sector. */
+	    n = min (sector_size, (usize_t) (output_end - output));
+
+	    do
+	      {
+		usize_t sym;
+
+		if ((ret = djw_decode_symbol (stream, & bstate, & input, input_end,
+					      gp_inorder, gp_base, gp_limit, gp_minlen, gp_maxlen,
+					      & sym, ALPHABET_SIZE))) { goto fail; }
+
+		*output++ = sym;
+	      }
+	    while (--n);
+	  }
+      }
+    }
+  }
+
+  IF_REGRESSION (if ((ret = xd3_test_clean_bits (stream, & bstate))) { goto fail; });
+  XD3_ASSERT (ret == 0);
+
+ fail:
+  xd3_free (stream, sel_group);
+
+  (*input_pos) = input;
+  (*output_pos) = output;
+  return ret;
+}
+
+/*********************************************************************/
+/*                              TUNING                               */
+/*********************************************************************/
+
+#if TUNE_HUFFMAN && XD3_ENCODER
+#include <stdio.h>
+#include "xdelta3-fgk.h"
+
+static uint
+xd3_bitsof_output (xd3_output *output, bit_state *bstate)
+{
+  uint x = 0;
+  uint m = bstate->cur_mask;
+
+  while (m != 1)
+    {
+      x += 1;
+      m >>= 1;
+    }
+
+  return x + 8 * xd3_sizeof_output (output);
+}
+
+static const char* xd3_sect_type (xd3_section_type type)
+{
+  switch (type)
+    {
+    case DATA_SECTION: return "DATA";
+    case INST_SECTION: return "INST";
+    case ADDR_SECTION: return "ADDR";
+    }
+  abort ();
+}
+
+static int
+xd3_encode_huff (xd3_stream   *stream,
+		 djw_stream  *h,
+		 xd3_output   *input,
+		 xd3_output   *unused_output,
+		 xd3_sec_cfg  *cfg)
+{
+  int ret = 0;
+  int input_size = xd3_sizeof_output (input);
+  static int hdr = 0;
+  const char *sect_type = xd3_sect_type (cfg->data_type);
+  xd3_output *output;
+  usize_t output_size;
+
+  if (hdr == 0) { hdr = 1; P(RINT "____ SECT INSZ SECTORSZ GPNO OUTSZ PREFIX SELECT ENCODE\n"); }
+
+  P(RINT "SECTION %s %u\n", sect_type, input_size);
+
+    {
+      int gp, i;
+      int best_size = 99999999;
+      usize_t best_prefix = 0, best_select = 0, best_encode = 0, best_sector_size = 0;
+      int best_gpno = -1;
+      const char *t12 = "12";
+      usize_t clen_count[DJW_MAX_CODELEN+1];
+      djw_weight best_freq[DJW_TOTAL_CODES];
+
+      for (cfg->ngroups = 1; cfg->ngroups <= /*1*/ DJW_MAX_GROUPS; cfg->ngroups += 1)
+	{
+	  for (cfg->sector_size = 10; cfg->sector_size <= DJW_SECTORSZ_MAX; cfg->sector_size += 10)
+	    {
+	      output = xd3_alloc_output (stream, NULL);
+
+	      if ((ret = xd3_real_encode_huff (stream, h, input, output, cfg))) { goto fail; }
+
+	      output_size = xd3_sizeof_output (output);
+
+	      if (output_size < best_size)
+		{
+		  best_size = output_size;
+		  best_gpno = cfg->ngroups;
+		  best_prefix = tune_prefix_bits;
+		  best_select = tune_select_bits;
+		  best_encode = tune_encode_bits;
+		  best_sector_size = cfg->sector_size;
+		  memset (clen_count, 0, sizeof (clen_count));
+
+		  for (gp = 0; gp < cfg->ngroups; gp += 1)
+		    {
+		      for (i = 0; i < ALPHABET_SIZE; i += 1)
+			{
+			  clen_count[tune_clen[gp][i]] += 1;
+			}
+		    }
+
+		  memcpy (best_freq, tune_freq, sizeof (tune_freq));
+
+		  XD3_ASSERT (sizeof (tune_freq) == sizeof (mtf_freq));
+		}
+
+	      if (1)
+		{
+		  P(RINT "COMP%s %u %u %u %u %u %u\n",
+			   t12, cfg->ngroups, cfg->sector_size,
+			   output_size, tune_prefix_bits, tune_select_bits, tune_encode_bits);
+		}
+	      else
+		{
+		fail:
+		  P(RINT "COMP%s %u %u %u %u %u %u\n",
+			   t12, cfg->ngroups, cfg->sector_size,
+			   input_size, 0, 0, 0);
+		}
+
+	      xd3_free_output (stream, output);
+
+	      XD3_ASSERT (ret == 0 || ret == XD3_NOSECOND);
+
+	      if (cfg->ngroups == 1) { break; }
+	    }
+	}
+
+      if (best_gpno > 0)
+	{
+	  P(RINT "BEST%s %u %u %u %u %u %u\n",
+		   t12, best_gpno, best_sector_size,
+		   best_size, best_prefix, best_select, best_encode);
+
+#if 0
+	  P(RINT "CLEN%s ", t12);
+	  for (i = 1; i <= DJW_MAX_CODELEN; i += 1)
+	    {
+	      P(RINT "%u ", clen_count[i]);
+	    }
+	  P(RINT "\n");
+
+	  P(RINT "FREQ%s ", t12);
+	  for (i = 0; i < DJW_TOTAL_CODES; i += 1)
+	    {
+	      P(RINT "%u ", tune_freq[i]);
+	    }
+	  P(RINT "\n");
+#endif
+	}
+    }
+
+  /* Compare to split single-table windows. */
+  {
+    int parts, i;
+
+    cfg->ngroups = 1;
+
+    for (parts = 2; parts <= DJW_MAX_GROUPS; parts += 1)
+      {
+	usize_t part_size = input_size / parts;
+	xd3_output *inp = input, *partin, *partin_head;
+	usize_t      off = 0;
+	usize_t      part_total = 0;
+	
+	if (part_size < 1000) { break; } 
+
+	for (i = 0; i < parts; i += 1)
+	  {
+	    usize_t inc;
+
+	    partin = partin_head = xd3_alloc_output (stream, NULL);
+	    output = xd3_alloc_output (stream, NULL);
+
+	    for (inc = 0; ((i < parts-1) && inc < part_size) ||
+		   ((i == parts-1) && inp != NULL); )
+	      {
+		usize_t take;
+
+		if (i < parts-1)
+		  {
+		    take = min (part_size - inc, inp->next - off);
+		  }
+		else
+		  {
+		    take = inp->next - off;
+		  }
+
+		ret = xd3_emit_bytes (stream, & partin, inp->base + off, take);
+
+		off += take;
+		inc += take;
+
+		if (off == inp->next)
+		  {
+		    inp = inp->next_page;
+		    off = 0;
+		  }
+	      }
+
+	    ret = xd3_real_encode_huff (stream, h, partin_head, output, cfg);
+
+	    part_total += xd3_sizeof_output (output);
+
+	    xd3_free_output (stream, partin_head);
+	    xd3_free_output (stream, output);
+
+	    XD3_ASSERT (ret == 0 || ret == XD3_NOSECOND);
+
+	    if (ret == XD3_NOSECOND)
+	      {
+		break;
+	      }
+	  }
+
+	if (ret != XD3_NOSECOND)
+	  {
+	    P(RINT "PART %u %u\n", parts, part_total);
+	  }
+      }
+  }
+
+  /* Compare to FGK */
+  {
+    fgk_stream *fgk = fgk_alloc (stream);
+    
+    fgk_init (fgk);
+    
+    output = xd3_alloc_output (stream, NULL);
+    
+    ret = xd3_encode_fgk (stream, fgk, input, output, NULL);
+    
+    output_size = xd3_sizeof_output (output);
+    xd3_free_output (stream, output);
+    fgk_destroy (stream, fgk);
+
+    XD3_ASSERT (ret == 0);
+    
+    P(RINT "FGK %u\n", output_size);
+  }
+
+  P(RINT "END_SECTION %s %u\n", sect_type, input_size);
+
+  return 0;
+}
+#endif
+
+#endif
diff --git a/xdelta3/xdelta3-fgk.h b/xdelta3/xdelta3-fgk.h
new file mode 100755
index 0000000..a19d65c
--- /dev/null
+++ b/xdelta3/xdelta3-fgk.h
@@ -0,0 +1,851 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2002 and onward.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* For demonstration purposes only.
+ */
+
+#ifndef _XDELTA3_FGK_h_
+#define _XDELTA3_FGK_h_
+
+/* An implementation of the FGK algorithm described by D.E. Knuth in "Dynamic Huffman
+ * Coding" in Journal of Algorithms 6. */
+
+/* A 32bit counter (fgk_weight) is used as the frequency counter for nodes in the huffman
+ * tree.  @!@ Need to test for overflow and/or reset stats. */
+
+typedef struct _fgk_stream fgk_stream;
+typedef struct _fgk_node   fgk_node;
+typedef struct _fgk_block  fgk_block;
+typedef unsigned int       fgk_bit;
+typedef uint32_t           fgk_weight;
+
+struct _fgk_block {
+  union {
+    fgk_node  *un_leader;
+    fgk_block *un_freeptr;
+  } un;
+};
+
+#define block_leader  un.un_leader
+#define block_freeptr un.un_freeptr
+
+/* The code can also support fixed huffman encoding/decoding. */
+#define IS_ADAPTIVE 1
+
+/* weight is a count of the number of times this element has been seen in the current
+ * encoding/decoding.  parent, right_child, and left_child are pointers defining the tree
+ * structure.  right and left point to neighbors in an ordered sequence of
+ * weights.  The left child of a node is always guaranteed to have weight not greater than
+ * its sibling.  fgk_blockLeader points to the element with the same weight as itself which is
+ * closest to the next increasing weight block.  */
+struct _fgk_node
+{
+  fgk_weight  weight;
+  fgk_node   *parent;
+  fgk_node   *left_child;
+  fgk_node   *right_child;
+  fgk_node   *left;
+  fgk_node   *right;
+  fgk_block  *my_block;
+};
+
+/* alphabet_size is the a count of the number of possible leaves in the huffman tree.  The
+ * number of total nodes counting internal nodes is ((2 * alphabet_size) - 1).
+ * zero_freq_count is the number of elements remaining which have zero frequency.
+ * zero_freq_exp and zero_freq_rem satisfy the equation zero_freq_count = 2^zero_freq_exp +
+ * zero_freq_rem.  root_node is the root of the tree, which is initialized to a node with
+ * zero frequency and contains the 0th such element.  free_node contains a pointer to the
+ * next available fgk_node space.  alphabet contains all the elements and is indexed by N.
+ * remaining_zeros points to the head of the list of zeros.  */
+struct _fgk_stream
+{
+  int alphabet_size;
+  int zero_freq_count;
+  int zero_freq_exp;
+  int zero_freq_rem;
+  int coded_depth;
+
+  int total_nodes;
+  int total_blocks;
+
+  fgk_bit *coded_bits;
+
+  fgk_block *block_array;
+  fgk_block *free_block;
+
+  fgk_node *decode_ptr;
+  fgk_node *remaining_zeros;
+  fgk_node *alphabet;
+  fgk_node *root_node;
+  fgk_node *free_node;
+};
+
+/*********************************************************************/
+/*                             Encoder                               */
+/*********************************************************************/
+
+static fgk_stream*     fgk_alloc           (xd3_stream *stream /*, int alphabet_size */);
+static void            fgk_init            (fgk_stream *h);
+static int             fgk_encode_data     (fgk_stream *h,
+					    int         n);
+static INLINE fgk_bit  fgk_get_encoded_bit (fgk_stream *h);
+
+static int             xd3_encode_fgk      (xd3_stream  *stream,
+					    fgk_stream  *sec_stream,
+					    xd3_output  *input,
+					    xd3_output  *output,
+					    xd3_sec_cfg *cfg);
+
+/*********************************************************************/
+/* 			       Decoder                               */
+/*********************************************************************/
+
+static INLINE int      fgk_decode_bit      (fgk_stream *h,
+					    fgk_bit     b);
+static int             fgk_decode_data     (fgk_stream *h);
+static void            fgk_destroy         (xd3_stream *stream,
+					    fgk_stream *h);
+
+static int             xd3_decode_fgk      (xd3_stream     *stream,
+					    fgk_stream     *sec_stream,
+					    const uint8_t **input,
+					    const uint8_t  *const input_end,
+					    uint8_t       **output,
+					    const uint8_t  *const output_end);
+
+/*********************************************************************/
+/* 			       Private                               */
+/*********************************************************************/
+
+static unsigned int fgk_find_nth_zero        (fgk_stream *h, int n);
+static int          fgk_nth_zero             (fgk_stream *h, int n);
+static void         fgk_update_tree          (fgk_stream *h, int n);
+static fgk_node*    fgk_increase_zero_weight (fgk_stream *h, int n);
+static void         fgk_eliminate_zero       (fgk_stream* h, fgk_node *node);
+static void         fgk_move_right           (fgk_stream *h, fgk_node *node);
+static void         fgk_promote              (fgk_stream *h, fgk_node *node);
+static void         fgk_init_node            (fgk_node *node, int i, int size);
+static fgk_block*   fgk_make_block           (fgk_stream *h, fgk_node *l);
+static void         fgk_free_block           (fgk_stream *h, fgk_block *b);
+static void         fgk_factor_remaining     (fgk_stream *h);
+static INLINE void  fgk_swap_ptrs            (fgk_node **one, fgk_node **two);
+
+/*********************************************************************/
+/* 			    Basic Routines                           */
+/*********************************************************************/
+
+/* returns an initialized huffman encoder for an alphabet with the
+ * given size.  returns NULL if enough memory cannot be allocated */
+static fgk_stream* fgk_alloc (xd3_stream *stream /*, int alphabet_size0 */)
+{
+  int alphabet_size0 = ALPHABET_SIZE;
+  fgk_stream *h;
+
+  if ((h = (fgk_stream*) xd3_alloc (stream, 1, sizeof (fgk_stream))) == NULL)
+    {
+      return NULL;
+    }
+
+  h->total_nodes  = (2 * alphabet_size0) - 1;
+  h->total_blocks = (2 * h->total_nodes);
+  h->alphabet     = (fgk_node*)  xd3_alloc (stream, h->total_nodes,    sizeof (fgk_node));
+  h->block_array  = (fgk_block*) xd3_alloc (stream, h->total_blocks,   sizeof (fgk_block));
+  h->coded_bits   = (fgk_bit*)   xd3_alloc (stream, alphabet_size0, sizeof (fgk_bit));
+
+  if (h->coded_bits  == NULL ||
+      h->alphabet    == NULL ||
+      h->block_array == NULL)
+    {
+      fgk_destroy (stream, h);
+      return NULL;
+    }
+
+  h->alphabet_size   = alphabet_size0;
+
+  return h;
+}
+
+static void fgk_init (fgk_stream *h)
+{
+  int i;
+
+  h->root_node       = h->alphabet;
+  h->decode_ptr      = h->root_node;
+  h->free_node       = h->alphabet + h->alphabet_size;
+  h->remaining_zeros = h->alphabet;
+  h->coded_depth     = 0;
+  h->zero_freq_count = h->alphabet_size + 2;
+
+  /* after two calls to factor_remaining, zero_freq_count == alphabet_size */
+  fgk_factor_remaining(h); /* set ZFE and ZFR */
+  fgk_factor_remaining(h); /* set ZFDB according to prev state */
+
+  IF_DEBUG (memset (h->alphabet, 0, sizeof (h->alphabet[0]) * h->total_nodes));
+
+  for (i = 0; i < h->total_blocks-1; i += 1)
+    {
+      h->block_array[i].block_freeptr = &h->block_array[i + 1];
+    }
+
+  h->block_array[h->total_blocks - 1].block_freeptr = NULL;
+  h->free_block = h->block_array;
+
+  /* Zero frequency nodes are inserted in the first alphabet_size
+   * positions, with Value, weight, and a pointer to the next zero
+   * frequency node.  */
+  for (i = h->alphabet_size - 1; i >= 0; i -= 1)
+    {
+      fgk_init_node (h->alphabet + i, i, h->alphabet_size);
+    }
+}
+
+static void fgk_swap_ptrs(fgk_node **one, fgk_node **two)
+{
+  fgk_node *tmp = *one;
+  *one = *two;
+  *two = tmp;
+}
+
+/* Takes huffman transmitter h and n, the nth elt in the alphabet, and
+ * returns the number of required to encode n. */
+static int fgk_encode_data (fgk_stream* h, int n)
+{
+  fgk_node *target_ptr = h->alphabet + n;
+
+  XD3_ASSERT (n < h->alphabet_size);
+
+  h->coded_depth = 0;
+
+  /* First encode the binary representation of the nth remaining
+   * zero frequency element in reverse such that bit, which will be
+   * encoded from h->coded_depth down to 0 will arrive in increasing
+   * order following the tree path.  If there is only one left, it
+   * is not neccesary to encode these bits. */
+  if (IS_ADAPTIVE && target_ptr->weight == 0)
+    {
+      unsigned int where, shift;
+      int bits;
+
+      where = fgk_find_nth_zero(h, n);
+      shift = 1;
+
+      if (h->zero_freq_rem == 0)
+	{
+	  bits = h->zero_freq_exp;
+	}
+      else
+	{
+	  bits = h->zero_freq_exp + 1;
+	}
+
+      while (bits > 0)
+	{
+	  h->coded_bits[h->coded_depth++] = (shift & where) && 1;
+
+	  bits   -= 1;
+	  shift <<= 1;
+	};
+
+      target_ptr = h->remaining_zeros;
+    }
+
+  /* The path from root to node is filled into coded_bits in reverse so
+   * that it is encoded in the right order */
+  while (target_ptr != h->root_node)
+    {
+      h->coded_bits[h->coded_depth++] = (target_ptr->parent->right_child == target_ptr);
+
+      target_ptr = target_ptr->parent;
+    }
+
+  if (IS_ADAPTIVE)
+    {
+      fgk_update_tree(h, n);
+    }
+
+  return h->coded_depth;
+}
+
+/* Should be called as many times as fgk_encode_data returns.
+ */
+static INLINE fgk_bit fgk_get_encoded_bit (fgk_stream *h)
+{
+  XD3_ASSERT (h->coded_depth > 0);
+
+  return h->coded_bits[--h->coded_depth];
+}
+
+/* This procedure updates the tree after alphabet[n] has been encoded
+ * or decoded.
+ */
+static void fgk_update_tree (fgk_stream *h, int n)
+{
+  fgk_node *incr_node;
+
+  if (h->alphabet[n].weight == 0)
+    {
+      incr_node = fgk_increase_zero_weight (h, n);
+    }
+  else
+    {
+      incr_node = h->alphabet + n;
+    }
+
+  while (incr_node != h->root_node)
+    {
+      fgk_move_right (h, incr_node);
+      fgk_promote    (h, incr_node);
+      incr_node->weight += 1;   /* incr the parent */
+      incr_node = incr_node->parent; /* repeat */
+    }
+
+  h->root_node->weight += 1;
+}
+
+static void fgk_move_right (fgk_stream *h, fgk_node *move_fwd)
+{
+  fgk_node **fwd_par_ptr, **back_par_ptr;
+  fgk_node *move_back, *tmp;
+
+  move_back = move_fwd->my_block->block_leader;
+
+  if (move_fwd         == move_back ||
+      move_fwd->parent == move_back ||
+      move_fwd->weight == 0)
+    {
+      return;
+    }
+
+  move_back->right->left = move_fwd;
+
+  if (move_fwd->left)
+    {
+      move_fwd->left->right = move_back;
+    }
+
+  tmp = move_fwd->right;
+  move_fwd->right = move_back->right;
+
+  if (tmp == move_back)
+    {
+      move_back->right = move_fwd;
+    }
+  else
+    {
+      tmp->left = move_back;
+      move_back->right = tmp;
+    }
+
+  tmp = move_back->left;
+  move_back->left = move_fwd->left;
+
+  if (tmp == move_fwd)
+    {
+      move_fwd->left = move_back;
+    }
+  else
+    {
+      tmp->right = move_fwd;
+      move_fwd->left = tmp;
+    }
+
+  if (move_fwd->parent->right_child == move_fwd)
+    {
+      fwd_par_ptr = &move_fwd->parent->right_child;
+    }
+  else
+    {
+      fwd_par_ptr = &move_fwd->parent->left_child;
+    }
+
+  if (move_back->parent->right_child == move_back)
+    {
+      back_par_ptr = &move_back->parent->right_child;
+    }
+  else
+    {
+      back_par_ptr = &move_back->parent->left_child;
+    }
+
+  fgk_swap_ptrs (&move_fwd->parent, &move_back->parent);
+  fgk_swap_ptrs (fwd_par_ptr, back_par_ptr);
+
+  move_fwd->my_block->block_leader = move_fwd;
+}
+
+/* Shifts node, the leader of its block, into the next block. */
+static void fgk_promote (fgk_stream *h, fgk_node *node)
+{
+  fgk_node *my_left, *my_right;
+  fgk_block *cur_block;
+
+  my_right  = node->right;
+  my_left   = node->left;
+  cur_block = node->my_block;
+
+  if (node->weight == 0)
+    {
+      return;
+    }
+
+  /* if left is right child, parent of remaining zeros case (?), means parent
+   * has same weight as right child. */
+  if (my_left == node->right_child &&
+      node->left_child &&
+      node->left_child->weight == 0)
+    {
+      XD3_ASSERT (node->left_child == h->remaining_zeros);
+      XD3_ASSERT (node->right_child->weight == (node->weight+1)); /* child weight was already incremented */
+      
+      if (node->weight == (my_right->weight - 1) && my_right != h->root_node)
+	{
+	  fgk_free_block (h, cur_block);
+	  node->my_block    = my_right->my_block;
+	  my_left->my_block = my_right->my_block;
+	}
+
+      return;
+    }
+
+  if (my_left == h->remaining_zeros)
+    {
+      return;
+    }
+
+  /* true if not the leftmost node */
+  if (my_left->my_block == cur_block)
+    {
+      my_left->my_block->block_leader = my_left;
+    }
+  else
+    {
+      fgk_free_block (h, cur_block);
+    }
+
+  /* node->parent != my_right */
+  if ((node->weight == (my_right->weight - 1)) && (my_right != h->root_node))
+    {
+      node->my_block = my_right->my_block;
+    }
+  else
+    {
+      node->my_block = fgk_make_block (h, node);
+    }
+}
+
+/* When an element is seen the first time this is called to remove it from the list of
+ * zero weight elements and introduce a new internal node to the tree.  */
+static fgk_node* fgk_increase_zero_weight (fgk_stream *h, int n)
+{
+  fgk_node *this_zero, *new_internal, *zero_ptr;
+
+  this_zero = h->alphabet + n;
+
+  if (h->zero_freq_count == 1)
+    {
+      /* this is the last one */
+      this_zero->right_child = NULL;
+
+      if (this_zero->right->weight == 1)
+	{
+	  this_zero->my_block = this_zero->right->my_block;
+	}
+      else
+	{
+	  this_zero->my_block = fgk_make_block (h, this_zero);
+	}
+
+      h->remaining_zeros = NULL;
+
+      return this_zero;
+    }
+
+  zero_ptr = h->remaining_zeros;
+
+  new_internal = h->free_node++;
+
+  new_internal->parent      = zero_ptr->parent;
+  new_internal->right       = zero_ptr->right;
+  new_internal->weight      = 0;
+  new_internal->right_child = this_zero;
+  new_internal->left        = this_zero;
+
+  if (h->remaining_zeros == h->root_node)
+    {
+      /* This is the first element to be coded */
+      h->root_node           = new_internal;
+      this_zero->my_block    = fgk_make_block (h, this_zero);
+      new_internal->my_block = fgk_make_block (h, new_internal);
+    }
+  else
+    {
+      new_internal->right->left = new_internal;
+
+      if (zero_ptr->parent->right_child == zero_ptr)
+	{
+	  zero_ptr->parent->right_child = new_internal;
+	}
+      else
+	{
+	  zero_ptr->parent->left_child = new_internal;
+	}
+
+      if (new_internal->right->weight == 1)
+	{
+	  new_internal->my_block = new_internal->right->my_block;
+	}
+      else
+	{
+	  new_internal->my_block = fgk_make_block (h, new_internal);
+	}
+
+      this_zero->my_block = new_internal->my_block;
+    }
+
+  fgk_eliminate_zero (h, this_zero);
+
+  new_internal->left_child = h->remaining_zeros;
+
+  this_zero->right       = new_internal;
+  this_zero->left        = h->remaining_zeros;
+  this_zero->parent      = new_internal;
+  this_zero->left_child  = NULL;
+  this_zero->right_child = NULL;
+
+  h->remaining_zeros->parent = new_internal;
+  h->remaining_zeros->right  = this_zero;
+
+  return this_zero;
+}
+
+/* When a zero frequency element is encoded, it is followed by the binary representation
+ * of the index into the remaining elements.  Sets a cache to the element before it so
+ * that it can be removed without calling this procedure again.  */
+static unsigned int fgk_find_nth_zero (fgk_stream* h, int n)
+{
+  fgk_node *target_ptr = h->alphabet + n;
+  fgk_node *head_ptr = h->remaining_zeros;
+  unsigned int idx = 0;
+
+  while (target_ptr != head_ptr)
+    {
+      head_ptr = head_ptr->right_child;
+      idx += 1;
+    }
+
+  return idx;
+}
+
+/* Splices node out of the list of zeros. */
+static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node)
+{
+  if (h->zero_freq_count == 1)
+    {
+      return;
+    }
+
+  fgk_factor_remaining(h);
+
+  if (node->left_child == NULL)
+    {
+      h->remaining_zeros = h->remaining_zeros->right_child;
+      h->remaining_zeros->left_child = NULL;
+    }
+  else if (node->right_child == NULL)
+    {
+      node->left_child->right_child = NULL;
+    }
+  else
+    {
+      node->right_child->left_child = node->left_child;
+      node->left_child->right_child = node->right_child;
+    }
+}
+
+static void fgk_init_node (fgk_node *node, int i, int size)
+{
+  if (i < size - 1)
+    {
+      node->right_child = node + 1;
+    }
+  else
+    {
+      node->right_child = NULL;
+    }
+
+  if (i >= 1)
+    {
+      node->left_child = node - 1;
+    }
+  else
+    {
+      node->left_child = NULL;
+    }
+
+  node->weight      = 0;
+  node->parent      = NULL;
+  node->right = NULL;
+  node->left  = NULL;
+  node->my_block    = NULL;
+}
+
+/* The data structure used is an array of blocks, which are unions of free pointers and
+ * huffnode pointers.  free blocks are a linked list of free blocks, the front of which is
+ * h->free_block.  The used blocks are pointers to the head of each block.  */
+static fgk_block* fgk_make_block (fgk_stream *h, fgk_node* lead)
+{
+  fgk_block *ret = h->free_block;
+
+  XD3_ASSERT (h->free_block != NULL);
+
+  h->free_block = h->free_block->block_freeptr;
+
+  ret->block_leader = lead;
+
+  return ret;
+}
+
+/* Restores the block to the front of the free list. */
+static void fgk_free_block (fgk_stream *h, fgk_block *b)
+{
+  b->block_freeptr = h->free_block;
+  h->free_block = b;
+}
+
+/* sets zero_freq_count, zero_freq_rem, and zero_freq_exp to satsity the equation given
+ * above.  */
+static void fgk_factor_remaining (fgk_stream *h)
+{
+  unsigned int i;
+
+  i = (--h->zero_freq_count);
+  h->zero_freq_exp = 0;
+
+  while (i > 1)
+    {
+      h->zero_freq_exp += 1;
+      i >>= 1;
+    }
+
+  i = 1 << h->zero_freq_exp;
+
+  h->zero_freq_rem = h->zero_freq_count - i;
+}
+
+/* receives a bit at a time and returns true when a complete code has
+ * been received.
+ */
+static int INLINE fgk_decode_bit (fgk_stream* h, fgk_bit b)
+{
+  XD3_ASSERT (b == 1 || b == 0);
+
+  if (IS_ADAPTIVE && h->decode_ptr->weight == 0)
+    {
+      int bitsreq;
+
+      if (h->zero_freq_rem == 0)
+	{
+	  bitsreq = h->zero_freq_exp;
+	}
+      else
+	{
+	  bitsreq = h->zero_freq_exp + 1;
+	}
+
+      h->coded_bits[h->coded_depth] = b;
+      h->coded_depth += 1;
+
+      return h->coded_depth >= bitsreq;
+    }
+  else
+    {
+      if (b)
+	{
+	  h->decode_ptr = h->decode_ptr->right_child;
+	}
+      else
+	{
+	  h->decode_ptr = h->decode_ptr->left_child;
+	}
+
+      if (h->decode_ptr->left_child == NULL)
+	{
+	  /* If the weight is non-zero, finished. */
+	  if (h->decode_ptr->weight != 0)
+	    {
+	      return 1;
+	    }
+
+	  /* zero_freq_count is dropping to 0, finished. */
+	  return h->zero_freq_count == 1;
+	}
+      else
+	{
+	  return 0;
+	}
+    }
+}
+
+static int fgk_nth_zero (fgk_stream* h, int n)
+{
+  fgk_node *ret = h->remaining_zeros;
+
+  /* ERROR: if during this loop (ret->right_child == NULL) then the encoder's zero count
+   * is too high.  Could return an error code now, but is probably unnecessary overhead,
+   * since the caller should check integrity anyway. */
+  for (; n != 0 && ret->right_child != NULL; n -= 1)
+    {
+      ret = ret->right_child;
+    }
+
+  return ret - h->alphabet;
+}
+
+/* once fgk_decode_bit returns 1, this retrieves an index into the
+ * alphabet otherwise this returns 0, indicating more bits are
+ * required.
+ */
+static int fgk_decode_data (fgk_stream* h)
+{
+  unsigned int elt = h->decode_ptr - h->alphabet;
+
+  if (IS_ADAPTIVE && h->decode_ptr->weight == 0) {
+    int i;
+    unsigned int n = 0;
+
+    for (i = 0; i < h->coded_depth - 1; i += 1)
+      {
+	n |= h->coded_bits[i];
+	n <<= 1;
+      }
+
+    n |= h->coded_bits[i];
+    elt = fgk_nth_zero(h, n);
+  }
+
+  h->coded_depth = 0;
+
+  if (IS_ADAPTIVE)
+    {
+      fgk_update_tree(h, elt);
+    }
+
+  h->decode_ptr = h->root_node;
+
+  return elt;
+}
+
+static void fgk_destroy (xd3_stream *stream,
+			 fgk_stream *h)
+{
+  if (h != NULL)
+    {
+      IF_DEBUG1({
+	int i;
+	for (i = 0; i < ALPHABET_SIZE; i += 1)
+	  {
+	    XP(OF, "freq[%u] = %u\n", i, h->alphabet[i].weight);
+	  }
+      });
+      
+      xd3_free (stream, h->alphabet);
+      xd3_free (stream, h->coded_bits);
+      xd3_free (stream, h->block_array);
+      xd3_free (stream, h);
+    }
+}
+
+/*********************************************************************/
+/* 			       Xdelta                                */
+/*********************************************************************/
+
+static int
+xd3_encode_fgk (xd3_stream *stream, fgk_stream *sec_stream, xd3_output *input, xd3_output *output, xd3_sec_cfg *cfg)
+{
+  bit_state   bstate = BIT_STATE_ENCODE_INIT;
+  xd3_output *cur_page;
+  int ret;
+
+  /* OPT: quit compression early if it looks bad */
+  for (cur_page = input; cur_page; cur_page = cur_page->next_page)
+    {
+      const uint8_t *inp     = cur_page->base;
+      const uint8_t *inp_max = inp + cur_page->next;
+
+      while (inp < inp_max)
+	{
+	  usize_t bits = fgk_encode_data (sec_stream, *inp++);
+
+	  while (bits--)
+	    {
+	      if ((ret = xd3_encode_bit (stream, & output, & bstate, fgk_get_encoded_bit (sec_stream)))) { return ret; }
+	    }
+	}
+    }
+
+  return xd3_flush_bits (stream, & output, & bstate);
+}
+
+static int
+xd3_decode_fgk (xd3_stream     *stream,
+		fgk_stream     *sec_stream,
+		const uint8_t **input_pos,
+		const uint8_t  *const input_max,
+		uint8_t       **output_pos,
+		const uint8_t  *const output_max)
+{
+  bit_state bstate;
+  uint8_t *output = *output_pos;
+  const uint8_t *input = *input_pos;
+
+  for (;;)
+    {
+      if (input == input_max)
+	{
+	  stream->msg = "secondary decoder end of input";
+	  return EINVAL;
+	}
+
+      bstate.cur_byte = *input++;
+
+      for (bstate.cur_mask = 1; bstate.cur_mask != 0x100; bstate.cur_mask <<= 1)
+	{
+	  int done = fgk_decode_bit (sec_stream, (bstate.cur_byte & bstate.cur_mask) && 1);
+
+	  if (! done) { continue; }
+
+	  *output++ = fgk_decode_data (sec_stream);
+
+	  if (unlikely (output == output_max))
+	    {
+	      /* During regression testing: */
+	      IF_REGRESSION ({
+		int ret;
+		bstate.cur_mask <<= 1;
+		if ((ret = xd3_test_clean_bits (stream, & bstate))) { return ret; }
+	      });
+
+	      (*output_pos) = output;
+	      (*input_pos) = input;
+	      return 0;
+	    }
+	}
+    }
+}
+
+#endif /* _XDELTA3_FGK_ */
diff --git a/xdelta3/xdelta3-list.h b/xdelta3/xdelta3-list.h
new file mode 100755
index 0000000..64a2582
--- /dev/null
+++ b/xdelta3/xdelta3-list.h
@@ -0,0 +1,130 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2002 and onward.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __XDELTA3_LIST__
+#define __XDELTA3_LIST__
+
+#define XD3_MAKELIST(LTYPE,ETYPE,LNAME)                                                   \
+                                                                                          \
+static inline ETYPE*                                                                      \
+LTYPE ## _entry (LTYPE* l)                                                                \
+{                                                                                         \
+  return (ETYPE*) ((char*) l - (unsigned long) &((ETYPE*) 0)->LNAME);                     \
+}                                                                                         \
+                                                                                          \
+static inline void                                                                        \
+LTYPE ## _init (LTYPE *l)                                                                 \
+{                                                                                         \
+  l->next = l;                                                                            \
+  l->prev = l;                                                                            \
+}                                                                                         \
+                                                                                          \
+static inline void                                                                        \
+LTYPE ## _add (LTYPE *prev, LTYPE *next, LTYPE *ins)                                      \
+{                                                                                         \
+  next->prev = ins;                                                                       \
+  prev->next = ins;                                                                       \
+  ins->next  = next;                                                                      \
+  ins->prev  = prev;                                                                      \
+}                                                                                         \
+                                                                                          \
+static inline void                                                                        \
+LTYPE ## _push_back (LTYPE *l, ETYPE *i)                                                  \
+{                                                                                         \
+  LTYPE ## _add (l->prev, l, & i->LNAME);                                                 \
+}                                                                                         \
+                                                                                          \
+static inline void                                                                        \
+LTYPE ## _del (LTYPE *next,                                                               \
+	       LTYPE *prev)                                                               \
+{                                                                                         \
+  next->prev = prev;                                                                      \
+  prev->next = next;                                                                      \
+}                                                                                         \
+                                                                                          \
+static inline ETYPE*                                                                      \
+LTYPE ## _remove (ETYPE *f)                                                               \
+{                                                                                         \
+  LTYPE *i = f->LNAME.next;                                                               \
+  LTYPE ## _del (f->LNAME.next, f->LNAME.prev);                                           \
+  return LTYPE ## _entry (i);                                                             \
+}                                                                                         \
+                                                                                          \
+static inline ETYPE*                                                                      \
+LTYPE ## _pop_back (LTYPE *l)                                                             \
+{                                                                                         \
+  LTYPE *i = l->prev;                                                                     \
+  LTYPE ## _del (i->next, i->prev);                                                       \
+  return LTYPE ## _entry (i);                                                             \
+}                                                                                         \
+                                                                                          \
+static inline ETYPE*                                                                      \
+LTYPE ## _pop_front (LTYPE *l)                                                            \
+{                                                                                         \
+  LTYPE *i = l->next;                                                                     \
+  LTYPE ## _del (i->next, i->prev);                                                       \
+  return LTYPE ## _entry (i);                                                             \
+}                                                                                         \
+                                                                                          \
+static inline int                                                                         \
+LTYPE ## _empty (LTYPE *l)                                                                \
+{                                                                                         \
+  return l == l->next;                                                                    \
+}                                                                                         \
+                                                                                          \
+static inline ETYPE*                                                                      \
+LTYPE ## _front (LTYPE *f)                                                                \
+{                                                                                         \
+  return LTYPE ## _entry (f->next);                                                       \
+}                                                                                         \
+                                                                                          \
+static inline ETYPE*                                                                      \
+LTYPE ## _back (LTYPE *f)                                                                 \
+{                                                                                         \
+  return LTYPE ## _entry (f->prev);                                                       \
+}                                                                                         \
+                                                                                          \
+static inline int                                                                         \
+LTYPE ## _end (LTYPE *f, ETYPE *i)                                                        \
+{                                                                                         \
+  return f == & i->LNAME;                                                                 \
+}                                                                                         \
+                                                                                          \
+static inline ETYPE*                                                                      \
+LTYPE ## _next (ETYPE *f)                                                                 \
+{                                                                                         \
+  return LTYPE ## _entry (f->LNAME.next);                                                 \
+}                                                                                         \
+                                                                                          \
+static inline int                                                                         \
+LTYPE ## _length (LTYPE *l)                                                               \
+{                                                                                         \
+  LTYPE *p;                                                                               \
+  int c = 0;                                                                              \
+                                                                                          \
+  for (p = l->next; p != l; p = p->next)                                                  \
+    {                                                                                     \
+      c += 1;                                                                             \
+    }                                                                                     \
+                                                                                          \
+  return c;                                                                               \
+}                                                                                         \
+                                                                                          \
+typedef int unused_ ## LTYPE
+
+#endif
diff --git a/xdelta3/xdelta3-main.h b/xdelta3/xdelta3-main.h
new file mode 100755
index 0000000..29469c3
--- /dev/null
+++ b/xdelta3/xdelta3-main.h
@@ -0,0 +1,2923 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2001 and onward.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* This is all the extra stuff you need for convenience to users in a command line
+ * application.  It contains these major components:
+ *
+ * 1. VCDIFF tools
+ * 2. external compression support (this is POSIX-specific).
+ * 3. a general read/write loop that handles all of the Xdelta decode/encode/VCDIFF-print
+ *    functions
+ * 4. command-line interpreter
+ * 5. an Xdelta application header which stores default filename, external compression settings
+ * 6. output/error printing
+ * 7. basic file support and OS interface
+ */
+
+/* Definite TODO list:
+ * 1. do exact gzip-like filename, stdout handling.  make a .xz extension, refuse
+ *    to encode to stdout without -cf, etc.
+ * 2. Allow the user to add a comment string to the app header without disturbing the default
+ *    behavior.
+ * 3. Define zero-length window behavior
+ * 4. Separate getopt() code from main and make flags modular, implement help.
+ * 5. Catch up on related research!
+ */
+
+/* Nice idea TODO list:
+ *
+ * 1. Should probably have a write buffer (option)?
+ * 2. Add a reporting function for displaying progress, warning messages out of the library.
+ * 3. Add WIN32 support in addition to XD3_POSIX/XD3_STDIO.  Should almost compile under windows
+ *    with XD3_STDIO, but not quite (e.g., stat()?).
+ * 4. Update-in-place, partial-encoding per the latest RFC: see "Wishful TODO" comments below
+ */
+
+/* On error handling and printing:
+ *
+ * The xdelta library sets stream->msg to indicate what condition caused an internal
+ * failure, but many failures originate here and are printed here.  The return convention
+ * is 0 for success, as throughout Xdelta code, but special attention is required here for
+ * the operating system calls with different error handling.  See the main_file_* routines.
+ * All errors in this file have a message printed at the time of occurance.  Since some of
+ * these calls occur within calls to the library, the error may end up being printed again
+ * with a more general error message.
+ */
+
+/******************************************************************************************/
+
+#ifndef XD3_POSIX
+#define XD3_POSIX 0
+#endif
+#ifndef XD3_STDIO
+#define XD3_STDIO 0
+#endif
+#ifndef XD3_WIN32
+#define XD3_WIN32 0
+#endif
+
+/* XPRINTX (used by main) prefixes an "xdelta3: " to the output. */
+#if 0 && XD3_DEBUG
+#define XPR fprintf (stderr, "xdelta3[%u]: ", getpid()); fprintf
+#define NT stderr,
+#else
+#define XPR fprintf
+#define NT stderr, "xdelta3: "
+#endif
+
+#define VC fprintf
+#define OUT vcout,
+
+/* If none are set, default to posix. */
+#if (XD3_POSIX + XD3_STDIO + XD3_WIN32) == 0
+#undef XD3_POSIX
+#define XD3_POSIX 1
+#endif
+
+/* Handle externally-compressed inputs. */
+#ifndef EXTERNAL_COMPRESSION
+#define EXTERNAL_COMPRESSION 1
+#endif
+
+#define PRINTHDR_SPECIAL -4378291
+
+#define PIPE_BUFSIZE (usize_t)(1 << 12)
+#define MIN_BUFSIZE (usize_t)(1 << 12)
+
+/* The number of soft-config variables.  Update as field count changes! */
+#define XD3_SOFTCFG_VARCNT 10
+
+/* this is used as in XPR(NT XD3_LIB_ERRMSG (stream, ret)) to print an error message
+ * from the library. */
+#define XD3_LIB_ERRMSG(stream, ret) "%s: %s\n", xd3_errstring (stream), xd3_strerror (ret)
+
+#include <stdio.h>  /* fprintf */
+#include <unistd.h> /* lots */
+
+#if XD3_POSIX
+#include <unistd.h> /* close, read, write... */
+#include <sys/types.h>
+#include <fcntl.h>
+#endif
+
+#include <sys/time.h> /* gettimeofday() */
+#include <sys/stat.h> /* stat() and fstat() */
+
+/******************************************************************************************
+ ENUMS and TYPES
+ ******************************************************************************************/
+
+/* These flags (mainly pertaining to main_read() operations) are set in the
+ * main_file->flags variable.  All are related to with external decompression support.
+ *
+ * RD_FIRST causes the external decompression check when the input is first read.
+ *
+ * RD_NONEXTERNAL disables external decompression for reading a compressed input, in the
+ * case of Xdelta inputs.  Note: Xdelta is supported as an external compression type,
+ * which makes is the reason for this flag.  An example to justify this is: to create a
+ * delta between two files that are VCDIFF-compressed.  Two external Xdelta decoders are
+ * run to supply decompressed source and target inputs to the Xdelta encoder. */
+typedef enum
+{
+  RD_FIRST        = (1 << 0),
+  RD_NONEXTERNAL  = (1 << 1),
+} xd3_read_flags;
+
+/* main_file->mode values */
+typedef enum
+{
+  XO_READ  = 0,
+  XO_WRITE = 1,
+} main_file_modes;
+
+/* Main commands.  For example, CMD_PRINTHDR is the "xdelta printhdr" command. */
+typedef enum
+{
+  CMD_NONE = 0,
+  CMD_PRINTHDR,
+  CMD_PRINTHDRS,
+  CMD_PRINTDELTA,
+#if XD3_ENCODER
+  CMD_ENCODE,
+#endif
+  CMD_DECODE,
+  CMD_TEST,
+  CMD_CONFIG,
+} xd3_cmd;
+
+#if XD3_ENCODER
+#define CMD_DEFAULT CMD_ENCODE
+#define IS_ENCODE(cmd) (cmd == CMD_ENCODE)
+#else
+#define CMD_DEFAULT CMD_DECODE
+#define IS_ENCODE(cmd) (0)
+#endif
+
+typedef struct _main_file        main_file;
+typedef struct _main_extcomp     main_extcomp;
+typedef struct _main_blklru      main_blklru;
+typedef struct _main_blklru_list main_blklru_list;
+
+/* The main_file object supports abstract system calls like open, close, read, write, seek,
+ * stat.  The program uses these to represent both seekable files and non-seekable files.
+ * Source files must be seekable, but the target input and any output file do not require
+ * seekability.
+ */
+struct _main_file
+{
+#if XD3_STDIO
+  FILE               *file;
+#elif XD3_POSIX
+  int                 file;
+#endif
+
+  int                 mode;       /* XO_READ and XO_WRITE */
+  const char         *filename;   /* File name or /dev/stdin, /dev/stdout, /dev/stderr. */
+  const char         *realname;   /* File name or /dev/stdin, /dev/stdout, /dev/stderr. */
+  const main_extcomp *compressor; /* External compression struct. */
+  int                 flags;      /* RD_FIRST or RD_NONEXTERNAL */
+  xoff_t              nread;      /* for input position */
+  xoff_t              nwrite;     /* for output position */
+};
+
+/* Various strings and magic values used to detect and call external compression.  See
+ * below for examples. */
+struct _main_extcomp
+{
+  const char    *recomp_cmdname;
+  const char    *recomp_options;
+
+  const char    *decomp_cmdname;
+  const char    *decomp_options;
+
+  const char    *ident;
+  const char    *magic;
+  int            magic_size;
+  int            flags;
+};
+
+/* This file implements a small LRU of source blocks.  For encoding purposes,
+ * we prevent paging in blocks we've already scanned in the source (return
+ * XD3_NOTAVAIL). */
+struct _main_blklru_list
+{
+  main_blklru_list  *next;
+  main_blklru_list  *prev;
+};
+
+struct _main_blklru
+{
+  uint8_t         *blk;
+  xoff_t           blkno;
+  main_blklru_list  link;
+};
+
+/* ... represented as a list (no cache index). */
+XD3_MAKELIST(main_blklru_list,main_blklru,link);
+
+/* Program options: various command line flags and options. */
+static int         option_stdout             = 0;
+static int         option_force              = 0;
+static int         option_verbose            = 0;
+static int         option_quiet              = 0;
+static int         option_level              = 6;
+static int         option_use_appheader      = 1;
+static uint8_t*    option_appheader          = NULL;
+static int         option_use_secondary      = /* until-standardized, leave this off */ 0;
+static char*       option_secondary          = NULL;
+static int         option_use_checksum       = 1;
+static int         option_use_altcodetable   = 0;
+static char*       option_smatch_config      = NULL;
+static int         option_no_compress        = 0;
+static int         option_no_output          = 0; /* go through the motions, but do not open or write output */
+static const char *option_source_filename    = NULL;
+
+static usize_t     option_winsize            = XD3_DEFAULT_WINSIZE;
+static usize_t     option_srcwinsz           = XD3_DEFAULT_SRCWINSZ;
+
+/* Wishful TODO: Support should probably be for partial deltas & update-in-place deltas,
+ * following the latest draft RFC specs partial deltas [the changes have moderate
+ * complexity].  The following flags implement primitive controls to skip sections
+ * of the input & output, mainly for debugging purposes. */
+
+/* DECODE-ONLY: Skips processing windows up to first_window and past last_window using the
+ * XD3_SKIP_WINDOW flag, but main_ still reads reads/parses every window.  TODO: make it
+ * meaningful for encode, etc... */
+/*static xoff_t      option_first_window = 0;*/
+/*static xoff_t      option_last_window  = XOFF_T_MAX;*/
+
+/* ENCODE-ONLY: Seeks to first_offset, EOF at last_offset, done entirely in this main_
+ * routines, so the library actually sees a shortened input.  TODO: implement this for
+ * decode, implement proper partial deltas, works with external compression?, works with
+ * non-seekable inputs?, change ranges, etc... */
+/*static xoff_t      option_first_offset = 0;*/
+/*static xoff_t      option_last_offset  = XOFF_T_MAX;*/
+
+/* This controls the number of times main repeats itself, only for profiling. */
+static int option_profile_cnt = 0;
+
+/* These variables are supressed to avoid their use w/o support.  main() warns
+ * appropriately. */
+#if EXTERNAL_COMPRESSION
+static int         option_decompress_inputs  = 1;
+static int         option_recompress_outputs = 1;
+#endif
+
+/* This is for comparing "printdelta" output without attention to
+ * copy-instruction modes, useful for reverse engineering. */
+#if VCDIFF_TOOLS
+static int         option_print_cpymode = 1;
+#endif
+
+/* Static variables */
+IF_DEBUG(static int main_mallocs = 0;)
+
+static char*          program_name = NULL;
+static uint8_t*       appheader_used = NULL;
+static uint8_t*       main_bdata = NULL;
+
+/* The LRU: obviously this is shared by all callers. */
+static int               lru_size = 0;
+static main_blklru      *lru = NULL;  /* array of lru_size elts */
+static main_blklru_list  lru_list;
+static main_blklru_list  lru_free;
+static int               do_not_lru = 0;  /* set to avoid lru, instead discard oldest */
+
+static int lru_hits   = 0;
+static int lru_misses = 0;
+static int lru_filled = 0;
+
+/* Hacks for VCDIFF tools */
+static int allow_fake_source = 0;
+
+/* This array of compressor types is compiled even if EXTERNAL_COMPRESSION is false just so
+ * the program knows the mapping of IDENT->NAME. */
+static main_extcomp extcomp_types[] =
+{
+  /* The entry for xdelta must be first because the program_name is set here. */
+  { "xdelta3",  "-cfq",  "xdelta3",    "-dcfq",  "X", "\xd6\xc3\xc4", 3, RD_NONEXTERNAL },
+  { "bzip2",    "-cf",   "bzip2",      "-dcf",   "B", "BZh",          3, 0 },
+  { "gzip",     "-cf",   "gzip",       "-dcf",   "G", "\037\213",     2, 0 },
+  { "compress", "-cf",   "uncompress", "-cf",    "Z", "\037\235",     2, 0 },
+};
+
+static void main_get_appheader (xd3_stream *stream, main_file *output, main_file *sfile);
+
+static int main_help (void);
+
+static int
+main_version (void)
+{
+  P(RINT "VERSION=3_PRERFC_0\n");
+  return EXIT_SUCCESS;
+}
+
+static int
+main_config (void)
+{
+  main_version ();
+  /* Compile-time */
+  P(RINT "VCDIFF_TOOLS=%d\n", VCDIFF_TOOLS);
+  P(RINT "REGRESSION_TEST=%d\n", REGRESSION_TEST);
+  P(RINT "SECONDARY_FGK=%d\n", SECONDARY_FGK);
+  P(RINT "SECONDARY_DJW=%d\n", SECONDARY_DJW);
+  P(RINT "GENERIC_ENCODE_TABLES=%d\n", GENERIC_ENCODE_TABLES);
+  P(RINT "GENERIC_ENCODE_TABLES_COMPUTE=%d\n", GENERIC_ENCODE_TABLES_COMPUTE);
+  P(RINT "EXTERNAL_COMPRESSION=%d\n", EXTERNAL_COMPRESSION);
+  P(RINT "XD3_POSIX=%d\n", XD3_POSIX);
+  P(RINT "XD3_DEBUG=%d\n", XD3_DEBUG);
+  P(RINT "XD3_USE_LARGEFILE64=%d\n", XD3_USE_LARGEFILE64);
+  P(RINT "XD3_ENCODER=%d\n", XD3_ENCODER);
+  /* Runtime sizes */
+  P(RINT "XD3_DEFAULT_WINSIZE=%d\n", XD3_DEFAULT_WINSIZE);
+  P(RINT "XD3_DEFAULT_SRCBLKSZ=%d\n", XD3_DEFAULT_SRCBLKSZ);
+  P(RINT "XD3_DEFAULT_SRCWINSZ=%d\n", XD3_DEFAULT_SRCWINSZ);
+  P(RINT "XD3_DEFAULT_MEMSIZE=%d\n", XD3_DEFAULT_MEMSIZE);
+  P(RINT "XD3_ALLOCSIZE=%d\n", XD3_ALLOCSIZE);
+  P(RINT "XD3_HARDMAXWINSIZE=%d\n", XD3_HARDMAXWINSIZE);
+  P(RINT "XD3_NODECOMPRESSSIZE=%d\n", XD3_NODECOMPRESSSIZE);
+  P(RINT "XD3_DEFAULT_IOPT_SIZE=%d\n", XD3_DEFAULT_IOPT_SIZE);
+  P(RINT "XD3_DEFAULT_SPREVSZ=%d\n", XD3_DEFAULT_SPREVSZ);
+
+  return EXIT_SUCCESS;
+}
+
+static void*
+main_malloc1 (usize_t size)
+{
+  void* r = malloc (size);
+  if (r == NULL) { XPR(NT "malloc: %s\n", xd3_strerror (ENOMEM)); }
+  else if (option_verbose > 2) { XPR(NT "malloc: %u\n", size); }
+  return r;
+}
+
+static void*
+main_malloc (usize_t size)
+{
+  void *r = main_malloc1 (size);
+   if (r) { IF_DEBUG (main_mallocs += 1); }
+  return r;
+}
+
+static void*
+main_alloc (void   *opaque,
+	    usize_t  items,
+	    usize_t  size)
+{
+  return main_malloc1 (items * size);
+}
+
+static void
+main_free (void **ptr)
+{
+  if (*ptr)
+    {
+      IF_DEBUG (main_mallocs -= 1); 
+      free (*ptr);
+      (*ptr) = NULL;
+    }
+}
+
+static void
+main_free1 (void *opaque, void *ptr)
+{
+  free (ptr);
+}
+
+/* This ensures that (ret = errno) always indicates failure, in case errno was
+ * accidentally not set.  If this prints there's a bug somewhere. */
+static int
+get_errno (void)
+{
+  if (errno == 0)
+    {
+      XPR(NT "you found a bug: expected errno != 0\n");
+      errno = EINVAL;
+    }
+  return errno;
+}
+
+static long
+get_millisecs_now (void)
+{
+  struct timeval tv;
+
+  gettimeofday (& tv, NULL);
+
+  return (tv.tv_sec) * 1000L + (tv.tv_usec) / 1000;
+}
+
+/* Always >= 1 millisec, right? */
+static long
+get_millisecs_since (void)
+{
+  double millis;
+  struct timeval tv;
+  /* static holds the first timeval */
+  static struct timeval init;
+ 
+  gettimeofday (& tv, NULL);
+
+  millis = (tv.tv_sec - init.tv_sec) * 1e6;
+  millis += (tv.tv_usec - init.tv_usec);
+  millis /= 1000;
+
+  init = tv;
+
+  return max ((long)millis, 1L);
+}
+
+static char*
+main_format_bcnt (xoff_t r, char *buf)
+{
+  static const char* fmts[] = { "B", "KB", "MB", "GB" };
+  int i;
+
+  for (i = 0; i < SIZEOF_ARRAY(fmts); i += 1)
+    {
+      if (r < 10 * 1e3 || i == -1 + SIZEOF_ARRAY(fmts))
+	{
+	  sprintf (buf, "%"Q"u %s", r, fmts[i]);
+	  break;
+	}
+      r /= 1000;
+    }
+  return buf;
+}
+
+static char*
+main_format_rate (xoff_t bytes, long millis, char *buf)
+{
+  xoff_t r = 1.0 * bytes / (1.0 * millis / 1000.0);
+  static char lbuf[32];
+
+  main_format_bcnt (r, lbuf);
+  sprintf (buf, "%s/sec", lbuf);
+  return buf;
+}
+
+static char*
+main_format_millis (long millis, char *buf)
+{
+  if (millis < 1000)       { sprintf (buf, "%lu ms", millis); }
+  else if (millis < 10000) { sprintf (buf, "%.1f sec", millis / 1000.0); }  
+  else                     { sprintf (buf, "%lu sec", millis / 1000L); }  
+  return buf;
+}
+
+/* A safe version of strtol for xoff_t. */
+static int
+main_strtoxoff (const char* s, xoff_t *xo, char which)
+{
+  char *e;
+  xoff_t x;
+
+  XD3_ASSERT(s && *s != 0);
+
+  {
+    /* Should check LONG_MIN, LONG_MAX, LLONG_MIN, LLONG_MAX? */
+#if SIZEOF_XOFF_T == 4
+    long xx = strtol (s, &e, 0);
+#else
+    long long xx = strtoll (s, &e, 0);
+#endif
+
+    if (xx < 0)
+      {
+	XPR(NT "-%c: negative integer: %s\n", which, s);
+	return EXIT_FAILURE;
+      }
+
+    x = xx;
+  }
+
+  if (*e != 0)
+    {
+      XPR(NT "-%c: invalid integer: %s\n", which, s);
+      return EXIT_FAILURE;
+    }
+
+  (*xo) = x;
+  return 0;
+}
+
+static int
+main_atou (const char* arg, usize_t *xo, usize_t low, char which)
+{
+  xoff_t x;
+  int ret;
+
+  if ((ret = main_strtoxoff (arg, & x, which))) { return ret; }
+
+  if (x > USIZE_T_MAX || x < low)
+    {
+      XPR(NT "-%c: minimum value: %u", which, low);
+      return EXIT_FAILURE;
+    }
+  (*xo) = x;
+  return 0;
+}
+
+/******************************************************************************************
+ FILE BASICS
+ ******************************************************************************************/
+
+/* With all the variation in file system-call semantics, arguments, return values and
+ * error-handling for the POSIX and STDIO file APIs, the insides of these functions make
+ * me sick, which is why these wrappers exist. */
+
+#define XOPEN_OPNAME (xfile->mode == XO_READ ? "read" : "write")
+#define XOPEN_STDIO  (xfile->mode == XO_READ ? "rb" : "wb")
+#define XOPEN_POSIX  (xfile->mode == XO_READ ? O_RDONLY : O_WRONLY | O_CREAT | O_TRUNC)
+#define XOPEN_MODE   (xfile->mode == XO_READ ? 0 : 0666)
+
+#define XF_ERROR(op, name, ret) XPR(NT "file %s failed: %s: %s: %s\n", (op), XOPEN_OPNAME, (name), xd3_strerror (ret))
+
+#if XD3_STDIO
+#define XFNO(f) fileno(f->file)
+#define XSTDOUT_XF(f) { (f)->file = stdout; (f)->filename = "/dev/stdout"; }
+#define XSTDERR_XF(f) { (f)->file = stderr; (f)->filename = "/dev/stderr"; }
+#define XSTDIN_XF(f)  { (f)->file = stdin;  (f)->filename = "/dev/stdin"; }
+
+#elif XD3_POSIX
+#define XFNO(f) f->file
+#define XSTDOUT_XF(f) { (f)->file = STDOUT_FILENO; (f)->filename = "/dev/stdout"; }
+#define XSTDERR_XF(f) { (f)->file = STDERR_FILENO; (f)->filename = "/dev/stderr"; }
+#define XSTDIN_XF(f)  { (f)->file = STDIN_FILENO;  (f)->filename = "/dev/stdin"; }
+#endif
+
+static void
+main_file_init (main_file *xfile)
+{
+  memset (xfile, 0, sizeof (*xfile));
+
+#if XD3_POSIX
+  xfile->file = -1;
+#endif
+}
+
+static int
+main_file_isopen (main_file *xfile)
+{
+#if XD3_STDIO
+  return xfile->file != NULL;
+
+#elif XD3_POSIX
+  return xfile->file != -1;
+#endif
+}
+
+static int
+main_file_close (main_file *xfile)
+{
+  int ret = 0;
+
+  if (! main_file_isopen (xfile))
+    {
+      return 0;
+    }
+
+#if XD3_STDIO
+  ret = fclose (xfile->file);
+  xfile->file = NULL;
+
+#elif XD3_POSIX
+  ret = close (xfile->file);
+  xfile->file = -1;
+#endif
+
+  if (ret != 0) { XF_ERROR ("close", xfile->filename, ret = get_errno ()); }
+  return ret;
+}
+
+static int
+main_file_open (main_file *xfile, const char* name, int mode)
+{
+  int ret = 0;
+
+  xfile->mode = mode;
+
+  XD3_ASSERT (! main_file_isopen (xfile));
+
+#if XD3_STDIO
+  xfile->file = fopen (name, XOPEN_STDIO);
+
+  ret = (xfile->file == NULL) ? get_errno () : 0;
+
+#elif XD3_POSIX
+  if ((ret = open (name, XOPEN_POSIX, XOPEN_MODE)) < 0)
+    {
+      ret = get_errno ();
+    }
+  else
+    {
+      xfile->file = ret;
+      ret = 0;
+    }
+#endif
+  if (ret) { XF_ERROR ("open", name, ret); }
+  else     { xfile->realname = name; xfile->nread = 0; }
+  return ret;
+}
+
+static int
+main_file_stat (main_file *xfile, xoff_t *size, int err_ifnoseek)
+{
+  int ret;
+  struct stat sbuf;
+
+  XD3_ASSERT (main_file_isopen (xfile));
+
+  if (fstat (XFNO (xfile), & sbuf) < 0)
+    {
+      ret = get_errno ();
+      if (err_ifnoseek) { XF_ERROR ("stat", xfile->filename, ret); }
+      return ret;
+    }
+
+  if (! S_ISREG (sbuf.st_mode))
+    {
+      if (err_ifnoseek) { XPR(NT "source file must be seekable: %s\n", xfile->filename); }
+      return ESPIPE;
+    }
+
+  (*size) = sbuf.st_size;
+  return 0;
+}
+
+static int
+main_file_exists (main_file *xfile)
+{
+  struct stat sbuf;
+  return stat (xfile->filename, & sbuf) == 0 && S_ISREG (sbuf.st_mode);
+}
+
+#if (XD3_POSIX || EXTERNAL_COMPRESSION)
+/* POSIX-generic code takes a function pointer to read() or write().  This calls the
+ * function repeatedly until the buffer is full or EOF.  The NREAD parameter is not
+ * set for write, NULL is passed.  Return is signed, < 0 indicate errors, otherwise
+ * byte count. */
+typedef int (xd3_posix_func) (int fd, uint8_t *buf, usize_t size);
+
+static int
+xd3_posix_io (int fd, uint8_t *buf, usize_t size, xd3_posix_func *func, usize_t *nread)
+{
+  int ret;
+  usize_t nproc = 0;
+
+  while (nproc < size)
+    {
+      int result = (*func) (fd, buf + nproc, size - nproc);
+      
+      if (result < 0)
+	{
+	  ret = get_errno ();
+	  if (ret != EAGAIN && ret != EINTR)
+	    {
+	      return ret;
+	    }
+	  result = 0;
+	}
+
+      if (nread != NULL && result == 0) { break; }
+
+      nproc += result;
+    }
+  if (nread != NULL) { (*nread) = nproc; }
+  return 0;
+}
+#endif
+
+/* POSIX is unbuffered, while STDIO is buffered.  main_file_read() should always be called
+ * on blocks. */
+static int
+main_file_read (main_file   *ifile,
+	       uint8_t    *buf,
+	       usize_t      size,
+	       usize_t     *nread,
+	       const char *msg)
+{
+  int ret = 0;
+
+#if XD3_STDIO
+  usize_t result;
+
+  result = fread (buf, 1, size, ifile->file);
+
+  if (result < size && ferror (ifile->file))
+    {
+      ret = get_errno ();
+    }
+  else
+    {
+      *nread = result;
+    }
+
+#elif XD3_POSIX
+  ret = xd3_posix_io (ifile->file, buf, size, (xd3_posix_func*) &read, nread);
+#endif
+
+  if (ret)
+    {
+      XPR(NT "%s: %s: %s\n", msg, ifile->filename, xd3_strerror (ret));
+    }
+  else
+    {
+      if (option_verbose > 2) { XPR(NT "main read: %s: %u\n", ifile->filename, (*nread)); }
+      ifile->nread += (*nread);
+    }
+
+  return ret;
+}
+
+static int
+main_file_write (main_file *ofile, uint8_t *buf, usize_t size, const char *msg)
+{
+  int ret = 0;
+
+#if XD3_STDIO
+  usize_t result;
+
+  result = fwrite (buf, 1, size, ofile->file);
+
+  if (result != size) { ret = get_errno (); }
+
+#elif XD3_POSIX
+  ret = xd3_posix_io (ofile->file, buf, size, (xd3_posix_func*) &write, NULL);
+#endif
+
+  if (ret)
+    {
+      XPR(NT "%s: %s: %s\n", msg, ofile->filename, xd3_strerror (ret));
+    }
+  else
+    {
+      if (option_verbose > 2) { XPR(NT "main write: %s: %u\n", ofile->filename, size); }
+      ofile->nwrite += size;
+    }
+
+  return ret;
+}
+
+static int
+main_file_seek (main_file *xfile, xoff_t pos)
+{
+  int ret = 0;
+
+#if XD3_STDIO
+  if (fseek (xfile->file, pos, SEEK_SET) != 0) { ret = get_errno (); }
+#else
+  if (lseek (xfile->file, pos, SEEK_SET) != pos) { ret = get_errno (); }
+#endif
+
+  if (ret)
+    {
+      XPR(NT "seek failed: %s: %s\n", xfile->filename, xd3_strerror (ret));
+    }
+
+  return ret;
+}
+
+/******************************************************************************************
+ VCDIFF TOOLS
+ ******************************************************************************************/
+
+#if VCDIFF_TOOLS
+/* This function prints a single VCDIFF window, mainly for debugging purposes. */
+static int
+main_print_window (xd3_stream* stream, FILE *vcout)
+{
+  int ret;
+  usize_t size = 0;
+
+  VC(OUT "  Offset Code Type1 Size1 @Addr1 + Type2 Size2 @Addr2\n");
+
+  while (stream->inst_sect.buf < stream->inst_sect.buf_max)
+    {
+      uint   code = stream->inst_sect.buf[0];
+
+      if ((ret = xd3_decode_instruction (stream))) { return ret; }
+
+      VC(OUT "  %06"Q"u %03u  %s %3u", stream->dec_winstart + size, code,
+	       xd3_rtype_to_string (stream->dec_current1.type, option_print_cpymode),
+	       stream->dec_current1.size);
+
+      if (stream->dec_current1.type != XD3_NOOP)
+	{
+	  size += stream->dec_current1.size;
+	  if (stream->dec_current1.type >= XD3_CPY)
+	    {
+	      VC(OUT " @%-6u", stream->dec_current1.addr);
+	    }
+	  else
+	    {
+	      VC(OUT "        ");
+	    }
+	}
+
+      if (stream->dec_current2.type != XD3_NOOP)
+	{
+	  size += stream->dec_current2.size;
+	  VC(OUT "  %s %3u",
+		   xd3_rtype_to_string (stream->dec_current2.type, option_print_cpymode),
+		   stream->dec_current2.size);
+
+	  if (stream->dec_current2.type >= XD3_CPY)
+	    {
+	      VC(OUT " @%-6u", stream->dec_current2.addr);
+	    }
+	}
+
+      VC(OUT "\n");
+    }
+
+  if (stream->dec_tgtlen != size && (stream->flags & XD3_SKIP_WINDOW) == 0)
+    {
+      XPR(NT "target window size inconsistency");
+      return EINVAL;
+    }
+
+  if (stream->dec_position != stream->dec_maxpos)
+    {
+      XPR(NT "target window position inconsistency");
+      return EINVAL;
+    }
+
+  if (stream->addr_sect.buf != stream->addr_sect.buf_max)
+    {
+      XPR(NT "address section inconsistency");
+      return EINVAL;
+    }
+
+  IF_DEBUG (VC(OUT "SIZE=%u  TGTLEN=%u\n", size, stream->dec_tgtlen));
+
+  return 0;
+}
+
+static void
+main_print_vcdiff_file (main_file *file, const char *type, FILE *vcout)
+{
+  if (file->filename)   { VC(OUT "XDELTA filename (%s):     %s\n", type, file->filename); }
+  if (file->compressor) { VC(OUT "XDELTA ext comp (%s):     %s\n", type, file->compressor->recomp_cmdname); }  
+}
+
+/* This function prints a VCDIFF input, mainly for debugging purposes. */
+static int
+main_print_func (xd3_stream* stream, main_file *xfile)
+{
+  int ret;
+  FILE *vcout;
+#if XD3_POSIX
+  if (! (vcout = fdopen (dup(xfile->file), "w")))
+    {
+      ret = get_errno ();
+      XPR(NT "fdopen: %s: %s\n", xfile->filename, xd3_strerror (ret));
+      return ret;
+    }
+#elif XD3_STDIO
+  vcout = xfile->file;
+#endif
+  XD3_ASSERT (vcout);
+  if (stream->dec_winstart == 0)
+    {
+      VC(OUT "VCDIFF version:               0\n");
+
+      VC(OUT "VCDIFF header size:           %d\n", stream->dec_hdrsize);
+      VC(OUT "VCDIFF header indicator:      ");
+      if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0) VC(OUT "VCD_SECONDARY ");
+      if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) VC(OUT "VCD_CODETABLE ");
+      if ((stream->dec_hdr_ind & VCD_APPHEADER) != 0) VC(OUT "VCD_APPHEADER ");
+      if (stream->dec_hdr_ind == 0) VC(OUT "none");
+      VC(OUT "\n");
+
+      IF_SEC(VC(OUT "VCDIFF secondary compressor:  %s\n", stream->sec_type ? stream->sec_type->name : "none"));
+      IF_NSEC(VC(OUT "VCDIFF secondary compressor: unsupported\n"));
+
+      if (stream->dec_hdr_ind & VCD_APPHEADER)
+	{
+	  uint8_t *apphead;
+	  usize_t appheadsz;
+	  ret = xd3_get_appheader (stream, & apphead, & appheadsz);
+
+	  if (ret == 0 && appheadsz > 0)
+	    {
+	      int sq = option_quiet;
+	      main_file o, s;
+	      XD3_ASSERT (apphead != NULL);
+	      VC(OUT "VCDIFF application header:    ");
+	      fwrite (apphead, 1, appheadsz, vcout);
+	      VC(OUT "\n");
+
+	      main_file_init (& o);
+	      main_file_init (& s);
+	      option_quiet = 1;
+	      main_get_appheader (stream, & o, & s);
+	      option_quiet = sq;
+	      main_print_vcdiff_file (& o, "output", vcout);
+	      main_print_vcdiff_file (& s, "source", vcout);
+	    }
+	}
+    }
+  else
+    {
+      VC(OUT "\n");
+    }
+
+  VC(OUT "VCDIFF window number:         %"Q"u\n", stream->current_window);
+  VC(OUT "VCDIFF window indicator:      ");
+  if ((stream->dec_win_ind & VCD_SOURCE) != 0) VC(OUT "VCD_SOURCE ");
+  if ((stream->dec_win_ind & VCD_TARGET) != 0) VC(OUT "VCD_TARGET ");
+  if ((stream->dec_win_ind & VCD_ADLER32) != 0) VC(OUT "VCD_ADLER32 ");
+  if (stream->dec_win_ind == 0) VC(OUT "none");
+  VC(OUT "\n");
+
+  if ((stream->dec_win_ind & VCD_ADLER32) != 0)
+    {
+      VC(OUT "VCDIFF adler32 checksum:      %08X\n", stream->dec_adler32);
+    }
+
+  if (stream->dec_del_ind != 0)
+    {
+      VC(OUT "VCDIFF delta indicator:       ");
+      if ((stream->dec_del_ind & VCD_DATACOMP) != 0) VC(OUT "VCD_DATACOMP ");
+      if ((stream->dec_del_ind & VCD_INSTCOMP) != 0) VC(OUT "VCD_INSTCOMP ");
+      if ((stream->dec_del_ind & VCD_ADDRCOMP) != 0) VC(OUT "VCD_ADDRCOMP ");
+      if (stream->dec_del_ind == 0) VC(OUT "none");
+      VC(OUT "\n");
+    }
+
+  if (stream->dec_winstart != 0)
+    {
+      VC(OUT "VCDIFF window at offset:      %"Q"u\n", stream->dec_winstart);
+    }
+
+  if (SRCORTGT (stream->dec_win_ind))
+    {
+      VC(OUT "VCDIFF copy window length:    %u\n", stream->dec_cpylen);
+      VC(OUT "VCDIFF copy window offset:    %"Q"u\n", stream->dec_cpyoff);
+    }
+
+  VC(OUT "VCDIFF delta encoding length: %u\n", stream->dec_enclen);
+  VC(OUT "VCDIFF target window length:  %u\n", stream->dec_tgtlen);
+
+  VC(OUT "VCDIFF data section length:   %u\n", stream->data_sect.size);
+  VC(OUT "VCDIFF inst section length:   %u\n", stream->inst_sect.size);
+  VC(OUT "VCDIFF addr section length:   %u\n", stream->addr_sect.size);
+
+  ret = 0; 
+  if ((stream->flags & XD3_JUST_HDR) != 0)
+    {
+      /* Print a header -- finished! */
+      ret = PRINTHDR_SPECIAL;
+    }
+  else if ((stream->flags & XD3_SKIP_WINDOW) == 0)
+    {
+      ret = main_print_window (stream, vcout);
+    }
+
+  fclose (vcout);
+  return ret;
+}
+#endif /* VCDIFF_TOOLS */
+
+/******************************************************************************************
+ Input decompression, output recompression
+ ******************************************************************************************/
+
+#if EXTERNAL_COMPRESSION
+/* This is tricky POSIX-specific code with lots of fork(), pipe(), dup(), waitpid(), and
+ * exec() business.  Most of this code originated in PRCS1, which did automatic
+ * package-file decompression.  It works with both XD3_POSIX and XD3_STDIO file
+ * disciplines.
+ *
+ * To automatically detect compressed inputs requires a child process to reconstruct the
+ * input stream, which was advanced in order to detect compression, because it may not be
+ * seekable.  In other words, the main program reads part of the input stream, and if it
+ * detects a compressed input it then forks a pipe copier process, which copies the
+ * first-read block out of the main-program's memory, then streams the remaining
+ * compressed input into the input-decompression pipe.
+ */
+
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+/* Remember which pipe FD is which. */
+#define PIPE_READ_FD  0
+#define PIPE_WRITE_FD 1
+
+static pid_t ext_subprocs[2];
+static const char* ext_tmpfile = NULL;
+
+/* Like write(), but makes repeated calls to empty the buffer. */
+static int
+main_pipe_write (int outfd, const uint8_t *exist_buf, usize_t remain)
+{
+  int ret;
+
+  if ((ret = xd3_posix_io (outfd, (uint8_t*) exist_buf, remain, (xd3_posix_func*) &write, NULL)))
+    {
+      XPR(NT "pipe write failed: %s", xd3_strerror (ret));
+      return ret;
+    }
+
+  return 0;
+}
+
+/* A simple error-reporting waitpid interface. */
+static int
+main_waitpid_check(pid_t pid)
+{
+  int status;
+  int ret = 0;
+
+  if (waitpid (pid, & status, 0) < 0)
+    {
+      ret = get_errno ();
+      XPR(NT "compression subprocess: wait: %s\n", xd3_strerror (ret));
+    }
+  else if (! WIFEXITED (status))
+    {
+      ret = ECHILD;
+      XPR(NT "compression subprocess: signal %d\n",
+	 WIFSIGNALED (status) ? WTERMSIG (status) : WSTOPSIG (status));
+    }
+  else if (WEXITSTATUS (status) != 0)
+    {
+      ret = ECHILD;
+      XPR(NT "compression subprocess: exit %d\n", WEXITSTATUS (status));
+    }
+
+  return ret;
+}
+
+/* Wait for any existing child processes to check for abnormal exit. */
+static int
+main_external_compression_finish (void)
+{
+  int i;
+  int ret;
+
+  for (i = 0; i < 2; i += 1)
+    {
+      if (! ext_subprocs[i]) { continue; }
+
+      if ((ret = main_waitpid_check (ext_subprocs[i])))
+	{
+	  return ret;
+	}
+    }
+
+  return 0;
+}
+
+/* This runs as a forked process of main_input_decompress_setup() to copy input to the
+ * decompression process.  First, the available input is copied out of the existing
+ * buffer, then the buffer is reused to continue reading from the compressed input
+ * file. */
+static int
+main_pipe_copier (uint8_t    *pipe_buf,
+		  usize_t      pipe_bufsize,
+		  usize_t      nread,
+		  main_file   *ifile,
+		  int         outfd)
+{
+  int ret;
+
+  for (;;)
+    {
+      if (nread > 0 && (ret = main_pipe_write (outfd, pipe_buf, nread)))
+	{
+	  return ret;
+	}
+
+      if (nread < pipe_bufsize)
+	{
+	  break;
+	}
+
+      if ((ret = main_file_read (ifile, pipe_buf, pipe_bufsize, & nread, "pipe read failed")) < 0)
+	{
+	  return ret;
+	}
+    }
+
+  return 0;
+}
+
+/* This function is called after we have read some amount of data from the input file and
+ * detected a compressed input.  Here we start a decompression subprocess by forking
+ * twice.  The first process runs the decompression command, the second process copies
+ * data to the input of the first. */
+static int
+main_input_decompress_setup (const main_extcomp     *decomp,
+			     main_file              *ifile,
+			     uint8_t               *input_buf,
+			     usize_t                 input_bufsize,
+			     uint8_t               *pipe_buf,
+			     usize_t                 pipe_bufsize,
+			     usize_t                 pipe_avail,
+			     usize_t                *nread)
+{
+  int outpipefd[2], inpipefd[2];  /* The two pipes: input and output file descriptors. */
+  int input_fd = -1;              /* The resulting input_fd (output of decompression). */
+  pid_t decomp_id, copier_id;     /* The two subprocs. */
+  int ret;
+
+  outpipefd[0] = outpipefd[1] = -1;
+  inpipefd[0]  = inpipefd[1]  = -1;
+
+  if (pipe (outpipefd) || pipe (inpipefd))
+    {
+      XPR(NT "pipe failed: %s\n", xd3_strerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  if ((decomp_id = fork ()) < 0)
+    {
+      XPR(NT "fork failed: %s\n", xd3_strerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  /* The first child runs the decompression process: */
+  if (decomp_id == 0)
+    {
+      /* Setup pipes: write to the outpipe, read from the inpipe. */
+      if (dup2 (outpipefd[PIPE_WRITE_FD], STDOUT_FILENO) < 0 ||
+	  dup2 (inpipefd[PIPE_READ_FD], STDIN_FILENO) < 0 ||
+	  close (outpipefd[PIPE_READ_FD]) ||
+	  close (outpipefd[PIPE_WRITE_FD]) ||
+	  close (inpipefd[PIPE_READ_FD]) ||
+	  close (inpipefd[PIPE_WRITE_FD]) ||
+	  execlp (decomp->decomp_cmdname, decomp->decomp_cmdname, decomp->decomp_options, NULL))
+	{
+	  XPR(NT "child process %s failed to execute: %s\n", decomp->decomp_cmdname, xd3_strerror (get_errno ()));
+	}
+
+      _exit (127);
+    }
+
+  ext_subprocs[0] = decomp_id;
+
+  if ((copier_id = fork ()) < 0)
+    {
+      XPR(NT "fork failed: %s\n", xd3_strerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  /* The second child runs the copier process: */
+  if (copier_id == 0)
+    {
+      int exitval = 0;
+
+      if (close (inpipefd[PIPE_READ_FD]) ||
+	  main_pipe_copier (pipe_buf, pipe_bufsize, pipe_avail, ifile, inpipefd[PIPE_WRITE_FD]) ||
+	  close (inpipefd[PIPE_WRITE_FD]))
+	{
+	  XPR(NT "child copier process failed: %s\n", xd3_strerror (get_errno ()));
+	  exitval = 1;
+	}
+
+      _exit (exitval);
+    }
+
+  ext_subprocs[1] = copier_id;
+
+  /* The parent closes both pipes after duplicating the output of compression. */
+  input_fd = dup (outpipefd[PIPE_READ_FD]);
+
+  if (input_fd < 0 ||
+      main_file_close (ifile) ||
+      close (outpipefd[PIPE_READ_FD]) ||
+      close (outpipefd[PIPE_WRITE_FD]) ||
+      close (inpipefd[PIPE_READ_FD]) ||
+      close (inpipefd[PIPE_WRITE_FD]))
+    {
+      XPR(NT "dup/close failed: %s\n", xd3_strerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+#if XD3_STDIO
+  /* Note: fdopen() acquires the fd, closes it when finished. */
+  if ((ifile->file = fdopen (input_fd, "r")) == NULL)
+    {
+      XPR(NT "fdopen failed: %s\n", xd3_strerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+#elif XD3_POSIX
+  ifile->file = input_fd;
+#endif
+
+  ifile->compressor = decomp;
+
+  /* Now the input file is decompressed. */
+  return main_file_read (ifile, input_buf, input_bufsize, nread, "input decompression failed");
+
+ pipe_cleanup:
+  close (input_fd);
+  close (outpipefd[PIPE_READ_FD]);
+  close (outpipefd[PIPE_WRITE_FD]);
+  close (inpipefd[PIPE_READ_FD]);
+  close (inpipefd[PIPE_WRITE_FD]);
+  return ret;
+}
+
+
+/* This routine is called when the first buffer of input data is read by the main program
+ * (unless input decompression is disabled by command-line option).  If it recognizes the
+ * magic number of a known input type it invokes decompression.
+ *
+ * Skips decompression if the decompression type or the file type is RD_NONEXTERNAL.
+ *
+ * Behaves exactly like main_file_read, otherwise.
+ *
+ * This function uses a separate buffer to read the first small block of input.  If a
+ * compressed input is detected, the separate buffer is passed to the pipe copier.  This
+ * avoids using the same size buffer in both cases. */
+static int
+main_decompress_input_check (main_file   *ifile,
+			    uint8_t    *input_buf,
+			    usize_t      input_size,
+			    usize_t     *nread)
+{
+  int i;
+  int ret;
+  uint8_t check_buf[PIPE_BUFSIZE];
+  usize_t  check_nread;
+
+  if ((ret = main_file_read (ifile, check_buf, min (input_size, PIPE_BUFSIZE), & check_nread, "input read failed")))
+    {
+      return ret;
+    }
+
+  for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
+    {
+      const main_extcomp *decomp = & extcomp_types[i];
+
+      if ((check_nread > decomp->magic_size) &&
+	  /* The following expr skips decompression if we are trying to read a VCDIFF
+	   * input and that is the magic number. */
+	  !((decomp->flags & RD_NONEXTERNAL) && (ifile->flags & RD_NONEXTERNAL)) &&
+	  memcmp (check_buf, decomp->magic, decomp->magic_size) == 0)
+	{
+	  if (! option_quiet)
+	    {
+	      XPR(NT "%s | %s %s\n",
+		 ifile->filename,
+		 decomp->decomp_cmdname,
+		 decomp->decomp_options);
+	    }
+
+	  return main_input_decompress_setup (decomp, ifile,
+					      input_buf, input_size,
+					      check_buf, PIPE_BUFSIZE,
+					      check_nread, nread);
+	}
+    }
+
+  /* Now read the rest of the input block. */
+  (*nread) = 0;
+
+  if (check_nread == PIPE_BUFSIZE)
+    {
+      ret = main_file_read (ifile, input_buf + PIPE_BUFSIZE, input_size - PIPE_BUFSIZE, nread, "input read failed");
+    }
+
+  memcpy (input_buf, check_buf, check_nread);
+
+  (*nread) += check_nread;
+
+  return 0;
+}
+
+/* This is called when the source file needs to be decompressed.  We fork/exec a
+ * decompression command with the proper input and output to a temporary file. */
+static int
+main_decompress_source (main_file *sfile, xd3_source *source)
+{
+  const main_extcomp *decomp = sfile->compressor;
+  pid_t decomp_id;  /* One subproc. */
+  int   input_fd  = -1;
+  int   output_fd = -1;
+  int   ret;
+  char *tmpname = NULL;
+  char *tmpdir  = getenv ("TMPDIR");
+  static const char tmpl[] = "/xd3src.XXXXXX";
+
+  /* Make a template for mkstmp() */
+  if (tmpdir == NULL) { tmpdir = "/tmp"; }
+  if ((tmpname = main_malloc (strlen (tmpdir) + sizeof (tmpl) + 1)) == NULL) { return ENOMEM; }
+  sprintf (tmpname, "%s%s", tmpdir, tmpl);
+
+  XD3_ASSERT (ext_tmpfile == NULL);
+  ext_tmpfile = tmpname;
+
+  /* Open the output FD. */
+  if ((output_fd = mkstemp (tmpname)) < 0)
+    {
+      XPR(NT "mkstemp failed: %s: %s", tmpname, xd3_strerror (ret = get_errno ()));
+      goto cleanup;
+    }
+
+  /* Copy the input FD, reset file position. */
+  XD3_ASSERT (main_file_isopen (sfile));
+#if XD3_STDIO
+  if ((input_fd = dup (fileno (sfile->file))) < 0)
+    {
+      XPR(NT "dup failed: %s", xd3_strerror (ret = get_errno ()));
+      goto cleanup;
+    }
+  main_file_close (sfile);
+  sfile->file = NULL;
+#elif XD3_POSIX
+  input_fd = sfile->file;
+  sfile->file = -1;
+#endif
+
+  if ((ret = lseek (input_fd, SEEK_SET, 0)) != 0)
+    {
+      XPR(NT "lseek failed: : %s", xd3_strerror (ret = get_errno ()));
+      goto cleanup;
+    }
+
+  if ((decomp_id = fork ()) < 0)
+    {
+      XPR(NT "fork failed: %s", xd3_strerror (ret = get_errno ()));
+      goto cleanup;
+    }
+
+  /* The child runs the decompression process: */
+  if (decomp_id == 0)
+    {
+      /* Setup pipes: write to the output file, read from the pipe. */
+      if (dup2 (input_fd, STDIN_FILENO) < 0 ||
+	  dup2 (output_fd, STDOUT_FILENO) < 0 ||
+	  execlp (decomp->decomp_cmdname, decomp->decomp_cmdname, decomp->decomp_options, NULL))
+	{
+	  XPR(NT "child process %s failed to execute: %s\n",
+		   decomp->decomp_cmdname, xd3_strerror (get_errno ()));
+	}
+
+      _exit (127);
+    }
+
+  close (input_fd);
+  close (output_fd);
+  input_fd  = -1;
+  output_fd = -1;
+
+  /* Then wait for completion. */
+  if ((ret = main_waitpid_check (decomp_id)))
+    {
+      goto cleanup;
+    }
+
+  /* Open/stat the decompressed source file. */
+  if ((ret = main_file_open (sfile, tmpname, XO_READ))) { goto cleanup; }
+  if ((ret = main_file_stat (sfile, & source->size, 1))) { goto cleanup; }
+  return 0;
+
+ cleanup:
+  close (input_fd);
+  close (output_fd);
+  if (tmpname) { free (tmpname); }
+  ext_tmpfile = NULL;
+  return ret;
+}
+
+/* Initiate re-compression of the output stream.  This is easier than input decompression
+ * because we know beforehand that the stream will be compressed, whereas the input has
+ * already been read when we decide it should be decompressed.  Thus, it only requires one
+ * subprocess and one pipe. */
+static int
+main_recompress_output (main_file *ofile)
+{
+  pid_t recomp_id;  /* One subproc. */
+  int   pipefd[2];  /* One pipe. */
+  int   output_fd = -1;
+  int   ret;
+  const main_extcomp *recomp = ofile->compressor;
+
+  pipefd[0] = pipefd[1] = -1;
+
+  if (pipe (pipefd))
+    {
+      XPR(NT "pipe failed: %s\n", xd3_strerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  if ((recomp_id = fork ()) < 0)
+    {
+      XPR(NT "fork failed: %s\n", xd3_strerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  /* The child runs the recompression process: */
+  if (recomp_id == 0)
+    {
+      /* Setup pipes: write to the output file, read from the pipe. */
+      if (dup2 (XFNO (ofile), STDOUT_FILENO) < 0 ||
+	  dup2 (pipefd[PIPE_READ_FD], STDIN_FILENO) < 0 ||
+	  close (pipefd[PIPE_READ_FD]) ||
+	  close (pipefd[PIPE_WRITE_FD]) ||
+	  execlp (recomp->recomp_cmdname, recomp->recomp_cmdname, recomp->recomp_options, NULL))
+	{
+	  XPR(NT "child process %s failed to execute: %s\n", recomp->recomp_cmdname, xd3_strerror (get_errno ()));
+	}
+
+      _exit (127);
+    }
+
+  ext_subprocs[0] = recomp_id;
+
+  /* The parent closes both pipes after duplicating the output-fd for writing to the
+   * compression pipe. */
+  output_fd = dup (pipefd[PIPE_WRITE_FD]);
+
+  if (output_fd < 0 ||
+      main_file_close (ofile) ||
+      close (pipefd[PIPE_READ_FD]) ||
+      close (pipefd[PIPE_WRITE_FD]))
+    {
+      XPR(NT "close failed: %s\n", xd3_strerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+#if XD3_STDIO
+  /* Note: fdopen() acquires the fd, closes it when finished. */
+  if ((ofile->file = fdopen (output_fd, "w")) == NULL)
+    {
+      XPR(NT "fdopen failed: %s\n", xd3_strerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+#elif XD3_POSIX
+  ofile->file = output_fd;
+#endif
+
+  /* Now the output file will be compressed. */
+  return 0;
+
+ pipe_cleanup:
+  close (output_fd);
+  close (pipefd[PIPE_READ_FD]);
+  close (pipefd[PIPE_WRITE_FD]);
+  return ret;
+}
+#endif /* EXTERNAL_COMPRESSION */
+
+/* Identify the compressor that was used based on its ident string, which is passed in the
+ * application header. */
+static const main_extcomp*
+main_ident_compressor (const char *ident)
+{
+  int i;
+
+  for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
+    {
+      if (strcmp (extcomp_types[i].ident, ident) == 0)
+	{
+	  return & extcomp_types[i];
+	}
+    }
+
+  return NULL;
+}
+
+/* Return the main_extcomp record to use for this identifier, if possible. */
+static const main_extcomp*
+main_get_compressor (const char *ident)
+{
+  const main_extcomp *ext = main_ident_compressor (ident);
+
+  if (ext == NULL)
+    {
+      if (! option_quiet)
+	{
+	  XPR(NT "warning: cannot recompress output: "
+		   "unrecognized external compression ID: %s\n", ident);
+	}
+      return NULL;
+    }
+  else if (! EXTERNAL_COMPRESSION)
+    {
+      if (! option_quiet)
+	{
+	  XPR(NT "warning: external support not compiled: "
+		   "original input was compressed: %s\n", ext->recomp_cmdname);
+	}
+      return NULL;
+    }
+  else
+    {
+      return ext;
+    }
+}
+
+/******************************************************************************************
+ APPLICATION HEADER
+ ******************************************************************************************/
+
+#if XD3_ENCODER
+static const char*
+main_apphead_string (const char* x)
+{
+  const char *y;
+
+  if (x == NULL) { return ""; }
+
+  if (strcmp (x, "/dev/stdin") == 0 ||
+      strcmp (x, "/dev/stdout") == 0 ||
+      strcmp (x, "/dev/stderr") == 0) { return "-"; }
+
+  return (y = strrchr (x, '/')) == NULL ? x : y + 1;
+}
+
+static int
+main_set_appheader (xd3_stream *stream, main_file *input, main_file *sfile)
+{
+  /* The user may disable the application header.  Once the appheader is set, this
+   * disables setting it again. */
+  if (appheader_used || ! option_use_appheader) { return 0; }
+
+  /* The user may specify the application header, otherwise format the default header. */
+  if (option_appheader)
+    {
+      appheader_used = option_appheader;
+    }
+  else
+    {
+      const char *iname;
+      const char *icomp;
+      const char *sname;
+      const char *scomp;
+      int len;
+
+      iname = main_apphead_string (input->filename);
+      icomp = (input->compressor == NULL) ? "" : input->compressor->ident;
+      len = strlen (iname) + strlen (icomp) + 2;
+
+      if (sfile->filename != NULL)
+	{
+	  sname = main_apphead_string (sfile->filename);
+	  scomp = (sfile->compressor == NULL) ? "" : sfile->compressor->ident;
+	  len += strlen (sname) + strlen (scomp) + 2;
+	}
+      else
+	{
+	  sname = scomp = "";
+	}
+
+      if ((appheader_used = main_malloc (len)) == NULL)
+	{
+	  return ENOMEM;
+	}
+
+      if (sfile->filename == NULL)
+	{
+	  sprintf ((char*)appheader_used, "%s/%s", iname, icomp);
+	}
+      else
+	{
+	  sprintf ((char*)appheader_used, "%s/%s/%s/%s", iname, icomp, sname, scomp);
+	}
+    }
+
+  xd3_set_appheader (stream, appheader_used, strlen ((char*)appheader_used));
+
+  return 0;
+}
+#endif
+
+static void
+main_get_appheader_params (main_file *file, char **parsed, int output, const char *type)
+{
+  /* Set the filename if it was not specified.  If output, option_stdout (-c) overrides. */
+  if (file->filename == NULL && ! (output && option_stdout) && strcmp (parsed[0], "-") != 0)
+    {
+      file->filename = parsed[0];
+
+      if (! option_quiet)
+	{
+	  XPR(NT "using default %s filename: %s\n", type, file->filename);
+	}
+    }
+
+  /* Set the compressor, initiate de/recompression later. */
+  if (file->compressor == NULL && *parsed[1] != 0)
+    {
+      file->compressor = main_get_compressor (parsed[1]);
+    }
+}
+
+static void
+main_get_appheader (xd3_stream *stream, main_file *output, main_file *sfile)
+{
+  uint8_t *apphead;
+  usize_t appheadsz;
+  int ret;
+
+  /* The user may disable the application header.  Once the appheader is set, this
+   * disables setting it again. */
+  if (! option_use_appheader) { return; }
+
+  ret = xd3_get_appheader (stream, & apphead, & appheadsz);
+
+  /* Ignore failure, it only means we haven't received a header yet. */
+  if (ret != 0) { return; }
+
+  if (appheadsz > 0)
+    {
+      char *start = (char*)apphead;
+      char *slash;
+      int   place = 0;
+      char *parsed[4];
+
+      memset (parsed, 0, sizeof (parsed));
+
+      while ((slash = strchr (start, '/')) != NULL)
+	{
+	  *slash = 0;
+	  parsed[place++] = start;
+	  start = slash + 1;
+	}
+
+      parsed[place++] = start;
+
+      /* First take the output parameters. */
+      if (place == 2 || place == 4)
+	{
+	  main_get_appheader_params (output, parsed, 1, "output");
+	}
+
+      /* Then take the source parameters. */
+      if (place == 4)
+	{
+	  main_get_appheader_params (sfile, parsed+2, 0, "source");
+	}
+    }
+
+  option_use_appheader = 0;
+  return;
+}
+
+/******************************************************************************************
+ Main I/O routines
+ ******************************************************************************************/
+
+/* This function acts like the above except it may also try to recognize a compressed
+ * input when the first buffer of data is read.  The EXTERNAL_COMPRESSION code is called
+ * to search for magic numbers. */
+static int
+main_read_primary_input (main_file   *ifile,
+			 uint8_t    *buf,
+			 usize_t      size,
+			 usize_t     *nread)
+{
+#if EXTERNAL_COMPRESSION
+  if (option_decompress_inputs && ifile->flags & RD_FIRST)
+    {
+      ifile->flags &= ~RD_FIRST;
+
+      return main_decompress_input_check (ifile, buf, size, nread);
+    }
+#endif
+
+  return main_file_read (ifile, buf, size, nread, "input read failed");
+}
+
+/* This function simply writes the stream output buffer, if there is any.  This is used
+ * for both encode and decode commands.  (The VCDIFF tools use main_print_func()). */
+static int
+main_write_output (xd3_stream* stream, main_file *ofile)
+{
+  int ret;
+
+  if (stream->avail_out > 0 && (ret = main_file_write (ofile, stream->next_out, stream->avail_out, "write failed")))
+    {
+      return ret;
+    }
+
+  return 0;
+}
+
+/* Open the main output file, sets a default file name, initiate recompression.  This
+ * function is expected to fprint any error messages. */
+static int
+main_open_output (xd3_stream *stream, main_file *ofile)
+{
+  int ret;
+
+  if (ofile->filename == NULL)
+    {
+      XSTDOUT_XF (ofile);
+
+      if (option_verbose > 1) { XPR(NT "using standard output: %s\n", ofile->filename); }
+    }
+  else
+    {
+      /* Stat the file to check for overwrite. */
+      if (option_force == 0 && main_file_exists (ofile))
+	{
+	  XPR(NT "to overwrite output file specify -f: %s\n", ofile->filename);
+	  return EEXIST;
+	}
+
+      if ((ret = main_file_open (ofile, ofile->filename, XO_WRITE)))
+	{
+	  return ret;
+	}
+
+      if (option_verbose > 1) { XPR(NT "open output: %s\n", ofile->filename); }
+    }
+
+#if EXTERNAL_COMPRESSION
+  /* Do output recompression. */
+  if (ofile->compressor != NULL && option_recompress_outputs == 1)
+    {
+      if (! option_quiet)
+	{
+	  XPR(NT "%s %s | %s\n",
+	     ofile->compressor->recomp_cmdname,
+	     ofile->compressor->recomp_options,
+	     ofile->filename);
+	}
+
+      if ((ret = main_recompress_output (ofile)))
+	{
+	  return ret;
+	}
+    }
+#endif
+
+  return 0;
+}
+
+/* This is called at different times for encoding and decoding.  The encoder calls it
+ * immediately, the decoder delays until the application header is received. */
+static int
+main_set_source (xd3_stream *stream, int cmd, main_file *sfile, xd3_source *source)
+{
+  int ret, i;
+
+  /* Open it, check for seekability, set required xd3_source fields. */
+  if (allow_fake_source)
+    {
+      sfile->mode = XO_READ;
+      sfile->realname = sfile->filename;
+      sfile->nread = 0;
+      source->size = UINT64_MAX;
+    }
+  else if ((ret = main_file_open (sfile, sfile->filename, XO_READ)) ||
+	   (ret = main_file_stat (sfile, & source->size, 1)))
+    {
+      return ret;
+    }
+
+  source->name     = sfile->filename;
+  source->ioh      = sfile;
+  source->curblkno = (xoff_t) -1;
+  source->curblk   = NULL;
+
+  /* Source block LRU init. */
+  main_blklru_list_init (& lru_list);
+  main_blklru_list_init (& lru_free);
+
+  option_srcwinsz = min(source->size, (xoff_t) option_srcwinsz);
+
+  if (option_verbose > 1) { XPR(NT "source window size: %u\n", option_srcwinsz); }
+  if (option_verbose > 1) { XPR(NT "source block size: %u\n", source->blksize); }
+  
+  lru_size = (option_srcwinsz / source->blksize) + 1;
+
+  XD3_ASSERT(lru_size <= 128);  /* TODO: fix performance here */
+
+  if ((lru = main_malloc (sizeof (main_blklru) * lru_size)) == NULL)
+    {
+      return ENOMEM;
+    }
+
+  for (i = 0; i < lru_size; i += 1)
+    {
+      lru[i].blkno = (xoff_t) -1;
+
+      if ((lru[i].blk = main_malloc (source->blksize)) == NULL)
+	{
+	  return ENOMEM;
+	}
+
+      main_blklru_list_push_back (& lru_free, & lru[i]);
+    }
+
+#if EXTERNAL_COMPRESSION
+  if (option_decompress_inputs)
+    {
+      if (IS_ENCODE (cmd))
+	{
+	  usize_t nread;
+
+	  source->curblk = lru[0].blk;
+
+	  /* If encoding, read the first block now to check for decompression. */
+	  if ((ret = main_file_read (sfile, (uint8_t*) source->curblk, source->blksize, & nread, "source read failed")))
+	    {
+	      return ret;
+	    }
+
+	  /* Check known magic numbers. */
+	  for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
+	    {
+	      const main_extcomp *decomp = & extcomp_types[i];
+
+	      if ((nread > decomp->magic_size) && memcmp (source->curblk, decomp->magic, decomp->magic_size) == 0)
+		{
+		  sfile->compressor = decomp;
+		  break;
+		}
+	    }
+
+	  /* If no decompression, the current buffer is now a valid source->curblock. */
+	  if (sfile->compressor == NULL)
+	    {
+	      main_blklru_list_remove (& lru[0]);
+	      main_blklru_list_push_back (& lru_list, & lru[0]);
+
+	      lru[0].blkno     = 0;
+	      source->curblkno = 0;
+	      source->onblk    = nread;
+
+	      if (option_verbose > 1)
+		{
+		  XPR(NT "source block 0 read (not compressed)\n");
+		}
+	    }
+	}
+
+      /* In either the encoder or decoder, start decompression. */
+      if (sfile->compressor)
+	{
+	  xoff_t osize = source->size;
+
+	  if (osize > XD3_NODECOMPRESSSIZE)
+	    {
+	      XPR(NT "source file too large for external decompression: %s: %"Q"u\n",
+		       sfile->filename, osize);
+	      return EFBIG;
+	    }
+
+	  if ((ret = main_decompress_source (sfile, source)))
+	    {
+	      return ret;
+	    }
+
+	  if (! option_quiet)
+	    {
+	      char s1[32], s2[32];
+	      XPR(NT "%s | %s %s => %s %.1f%% [ %s , %s ]\n",
+		 sfile->filename,
+		 sfile->compressor->decomp_cmdname,
+		 sfile->compressor->decomp_options,
+		 sfile->realname,
+		 100.0 * source->size / osize,
+		 main_format_bcnt (osize, s1),
+		 main_format_bcnt (source->size, s2));
+	    }
+	}
+    }
+#endif
+
+  if (option_verbose > 1) { XPR(NT "source file: %s: %"Q"u bytes\n", sfile->realname, source->size); }
+
+  if ((ret = xd3_set_source (stream, source)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (stream, ret));
+      return EXIT_FAILURE;
+    }
+
+  return 0;
+}
+
+/******************************************************************************************
+ Source routines
+ ******************************************************************************************/
+
+/* This is the callback for reading a block of source.  This function is blocking and it
+ * implements a small LRU.
+ *
+ * Note that it is possible for main_input() to handle getblk requests in a non-blocking
+ * manner.  If the callback is NULL then the caller of xd3_*_input() must handle the
+ * XD3_GETSRCBLK return value and fill the source in the same way.  See xd3_getblk for
+ * details.  To see an example of non-blocking getblk, see xdelta-test.h. */
+static int
+main_getblk_func (xd3_stream *stream,
+		  xd3_source *source,
+		  xoff_t      blkno)
+{
+  xoff_t      pos   = blkno * source->blksize;
+  main_file   *sfile = (main_file*) source->ioh;
+  main_blklru *blru  = NULL;
+  usize_t      onblk = xd3_bytes_on_srcblk (source, blkno);
+  usize_t      nread;
+  int         ret;
+  int         i;
+
+  if (allow_fake_source)
+    {
+      source->curblkno = blkno;
+      source->onblk    = onblk;
+      source->curblk   = lru[0].blk;
+      return 0;
+    }
+
+  if (do_not_lru)
+    {
+      /* Direct lookup assumes sequential scan w/o skipping blocks. */
+      int idx = blkno % lru_size;
+      if (lru[idx].blkno == blkno)
+	{
+	  source->curblkno = blkno;
+	  source->onblk    = onblk;
+	  source->curblk   = lru[idx].blk;
+	  lru_hits += 1;
+	  return 0;
+	}
+      XD3_ASSERT (lru[idx].blkno == -1LL ||
+		  lru[idx].blkno == blkno - lru_size);
+    }
+  else
+    {
+      /* Sequential search through LRU. */
+      for (i = 0; i < lru_size; i += 1)
+	{
+	  if (lru[i].blkno == blkno)
+	    {
+	      main_blklru_list_remove (& lru[i]);
+	      main_blklru_list_push_back (& lru_list, & lru[i]);
+
+	      source->curblkno = blkno;
+	      source->onblk    = onblk;
+	      source->curblk   = lru[i].blk;
+	      lru_hits += 1;
+	      return 0;
+	    }
+	}
+    }
+
+  if (! main_blklru_list_empty (& lru_free))
+    {
+      blru = main_blklru_list_pop_front (& lru_free);
+    }
+  else if (! main_blklru_list_empty (& lru_list))
+    {
+      if (do_not_lru) {
+	blru = & lru[blkno % lru_size];
+	main_blklru_list_remove(blru);
+      } else {
+	blru = main_blklru_list_pop_front (& lru_list);
+      }
+      lru_misses += 1;
+    }
+
+  lru_filled += 1;
+
+  if ((ret = main_file_seek (sfile, pos)))
+    {
+      return ret;
+    }
+
+  if ((ret = main_file_read (sfile, (uint8_t*) blru->blk, source->blksize,
+			     & nread, "source read failed")))
+    {
+      return ret;
+    }
+
+  if (nread != onblk)
+    {
+      XPR(NT "source file size change: %s\n", sfile->filename);
+      return EINVAL;
+    }
+
+  main_blklru_list_push_back (& lru_list, blru);
+
+  if (option_verbose > 1)
+    {
+      if (blru->blkno != -1LL)
+	{
+	  XPR(NT "source block %"Q"u ejects %"Q"u (lru_hits=%u, lru_misses=%u, lru_filled=%u)\n",
+	      blkno, blru->blkno, lru_hits, lru_misses, lru_filled);
+	}
+      else
+	{
+	  XPR(NT "source block %"Q"u read (lru_hits=%u, lru_misses=%u, lru_filled=%u)\n",
+					  blkno, lru_hits, lru_misses, lru_filled);
+	}
+    }
+
+  blru->blkno      = blkno;
+  source->curblk   = blru->blk;
+  source->curblkno = blkno;
+  source->onblk    = onblk;
+
+  return 0;
+}
+
+/******************************************************************************************
+ Main routines
+ ******************************************************************************************/
+
+/* This is a generic input function.  It calls the xd3_encode_input or xd3_decode_input
+ * functions and makes calls to the various input handling routines above, which
+ * coordinate external decompression.
+ *
+ * TODO config: Still need options for the at least: smatch config, memsize, sprevsz,
+ * XD3_SEC_* flags, greedy/1.5
+ */
+static int
+main_input (xd3_cmd     cmd,
+	    main_file   *ifile,
+	    main_file   *ofile,
+	    main_file   *sfile)
+{
+  int        ret;
+  xd3_stream stream;
+  usize_t    nread;
+  int        stream_flags = 0;
+  xd3_config config;
+  xd3_source source;
+  xoff_t     last_total_in = 0;
+  xoff_t     last_total_out = 0;
+  long       start_time;
+
+  int (*input_func) (xd3_stream*);
+  int (*output_func) (xd3_stream*, main_file *);
+
+  memset (& source, 0, sizeof (source));
+  memset (& config, 0, sizeof (config));
+
+  config.alloc = main_alloc;
+  config.freef = main_free1;
+  config.sec_data.ngroups = 1;
+  config.sec_addr.ngroups = 1;
+  config.sec_inst.ngroups = 1;
+
+  /* main_input setup. */
+  switch ((int) cmd)
+    {
+#if VCDIFF_TOOLS
+           if (1) { case CMD_PRINTHDR:   stream_flags = XD3_JUST_HDR; }
+      else if (1) { case CMD_PRINTHDRS:  stream_flags = XD3_SKIP_WINDOW; }
+      else        { case CMD_PRINTDELTA: stream_flags = XD3_SKIP_EMIT; }
+      ifile->flags |= RD_NONEXTERNAL;
+      input_func    = xd3_decode_input;
+      output_func   = main_print_func;
+      stream_flags |= XD3_ADLER32_NOVER;
+      break;
+#endif
+#if XD3_ENCODER
+    case CMD_ENCODE:
+      input_func  = xd3_encode_input;
+      output_func = main_write_output;
+
+      if (option_use_checksum) { stream_flags |= XD3_ADLER32; }
+      if (option_use_secondary)
+	{
+	  /* The default secondary compressor is DJW, if it's compiled, being used, etc. */
+	  if (option_secondary == NULL)
+	    {
+	      if (SECONDARY_DJW) { stream_flags |= XD3_SEC_DJW; }
+	    }
+	  else
+	    {
+	      if (strcmp (option_secondary, "fgk") == 0 && SECONDARY_FGK)
+		{
+		  stream_flags |= XD3_SEC_FGK;
+		}
+	      else if (strcmp (option_secondary, "djw") == 0 && SECONDARY_DJW)
+		{
+		  stream_flags |= XD3_SEC_DJW;
+		}
+	      else
+		{
+		  XPR(NT "unrecognized secondary compressor type: %s\n", option_secondary);
+		  return EXIT_FAILURE;
+		}
+	    }
+	}
+      if (option_no_compress)      { stream_flags |= XD3_NOCOMPRESS; }
+      if (option_use_altcodetable) { stream_flags |= XD3_ALT_CODE_TABLE; }
+      if (option_smatch_config)
+	{
+	  char *s = option_smatch_config, *e;
+	  int values[XD3_SOFTCFG_VARCNT];
+	  int got;
+
+	  config.smatch_cfg = XD3_SMATCH_SOFT;
+
+	  for (got = 0; got < XD3_SOFTCFG_VARCNT; got += 1, s = e + 1)
+	    {
+	      values[got] = strtol (s, &e, 10);
+
+	      if ((values[got] < 0) ||
+		  (e == s) ||
+		  (got < XD3_SOFTCFG_VARCNT-1 && *e == 0) ||
+		  (got == XD3_SOFTCFG_VARCNT-1 && *e != 0))
+		{
+		  XPR(NT "invalid string match specifier (-C)\n");
+		  return EXIT_FAILURE;
+		}
+	    }
+
+	  config.large_look    = values[0];
+	  config.large_step    = values[1];
+	  config.small_look    = values[2];
+	  config.small_chain   = values[3];
+	  config.small_lchain  = values[4];
+	  config.ssmatch       = values[5];
+	  config.try_lazy      = values[6];
+	  config.max_lazy      = values[7];
+	  config.long_enough   = values[8];
+	  config.promote       = values[9];
+	  config.srcwin_size   = values[10];
+	  config.srcwin_maxsz  = values[11];
+	}
+      else if (option_level < 5) { config.smatch_cfg = XD3_SMATCH_FAST; }
+      else                       { config.smatch_cfg = XD3_SMATCH_SLOW; }
+      break;
+#endif
+    case CMD_DECODE:
+      if (option_use_checksum == 0) { stream_flags |= XD3_ADLER32_NOVER; }
+      stream_flags  = 0;
+      ifile->flags |= RD_NONEXTERNAL;
+      input_func    = xd3_decode_input;
+      output_func   = main_write_output;
+      break;
+    default:
+      XPR(NT "internal error\n");
+      return EXIT_FAILURE;
+    }
+
+  start_time = get_millisecs_now ();
+
+  /* allocate an input buffer.  min(file_size, option_winsize) */
+  {
+    xoff_t input_size = 0;
+    config.winsize = option_winsize;
+    if (main_file_stat (ifile, & input_size, 0) == 0)
+      {
+	config.winsize = min (input_size, (xoff_t) option_winsize);
+      }
+    config.winsize = xd3_round_blksize (config.winsize, MIN_BUFSIZE);
+    config.winsize = max (config.winsize, MIN_BUFSIZE);
+  }
+  {
+    /* Source blocksize is not user-settable, only option_srcwinsz is,
+     * which determines the number of blocks. */
+    source.blksize = XD3_DEFAULT_SRCBLKSZ;
+    option_srcwinsz = xd3_round_blksize(option_srcwinsz, MIN_BUFSIZE);
+    option_srcwinsz = max(option_srcwinsz, MIN_BUFSIZE);
+    config.srcwin_maxsz = option_srcwinsz;
+  }
+
+  if (option_verbose > 1) { XPR(NT "input buffer size: %u\n", config.winsize); }
+  
+  if ((main_bdata = main_malloc (config.winsize)) == NULL)
+    {
+      return EXIT_FAILURE;
+    }
+
+  config.getblk = main_getblk_func;
+  config.flags  = stream_flags;
+
+  if ((ret = xd3_config_stream (& stream, & config)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (& stream, ret));
+      return EXIT_FAILURE;
+    }
+
+  if (IS_ENCODE (cmd))
+    {
+      /* When encoding, open the source file, possibly decompress it.  The decoder delays
+       * this step until XD3_GOTHEADER. */
+      if (sfile->filename != NULL && (ret = main_set_source (& stream, cmd, sfile, & source)))
+	{
+	  return EXIT_FAILURE;
+	}
+    }
+
+  /*XD3_ASSERT (option_first_offset <= option_last_offset);*/
+  /*XD3_ASSERT (option_first_window <= option_last_window);*/
+
+  /*if (option_first_offset != 0 && (ret = main_file_seek (ifile, option_first_offset)))
+    {
+      return EXIT_FAILURE;
+      }*/
+
+  /* This times each window. */
+  get_millisecs_since ();
+
+  /* Main input loop. */
+  do
+    {
+      xoff_t input_offset;
+      xoff_t input_remain;
+      usize_t try_read;
+
+      input_offset = ifile->nread;
+      /*XD3_ASSERT (input_offset <= option_last_offset);*/
+
+      input_remain = /*option_last_offset*/ XOFF_T_MAX - input_offset;
+
+      try_read = (usize_t) min ((xoff_t) config.winsize, input_remain);
+
+      if ((ret = main_read_primary_input (ifile, main_bdata, try_read, & nread)))
+	{
+	  return EXIT_FAILURE;
+	}
+
+      /* If we've reached EOF tell the stream to flush. */
+      if (nread < try_read)
+	{
+	  stream_flags |= XD3_FLUSH;
+	  xd3_set_flags (& stream, stream_flags);
+	}
+
+#if XD3_ENCODER
+      /* After the first main_read_primary_input completes, we know all the information
+       * needed to encode the application header. */
+      if (cmd == CMD_ENCODE && (ret = main_set_appheader (& stream, ifile, sfile)))
+	{
+	  return EXIT_FAILURE;
+	}
+#endif
+      xd3_avail_input (& stream, main_bdata, nread);
+
+      /* If we read zero bytes after encoding at least one window... */
+      if (nread == 0 && stream.current_window > 0) {
+	break;
+      }
+
+    again:
+      ret = input_func (& stream);
+      /*if (option_verbose > 1) { XPR(NT XD3_LIB_ERRMSG (& stream, ret)); }*/
+
+      switch (ret)
+	{
+	case XD3_INPUT:
+	  continue;
+
+	case XD3_GOTHEADER:
+	  {
+	    XD3_ASSERT (stream.current_window == 0);
+
+	    /* Need to process the appheader as soon as possible.  It may contain a
+	     * suggested default filename/decompression routine for the ofile, and it may
+	     * contain default/decompression routine for the sources. */
+	    if (cmd == CMD_DECODE)
+	      {
+		int have_src = sfile->filename != NULL;
+		int need_src = xd3_decoder_needs_source (& stream);
+		int recv_src;
+
+		/* May need to set the sfile->filename if none was given. */
+		main_get_appheader (& stream, ofile, sfile);
+
+		recv_src = sfile->filename != NULL;
+
+		/* Check if the user expected a source to be required although it was not. */
+		if (have_src && ! need_src && ! option_quiet)
+		  {
+		    XPR(NT "warning: output window %"Q"u does not copy source\n", stream.current_window);
+		  }
+
+		/* Check if we have no source name and need one. */
+		/* TODO: this doesn't fire due to cpyblocks_ calculation check */
+		if (need_src && ! recv_src)
+		  {
+		    XPR(NT "input requires a source file, use -s\n");
+		    return EXIT_FAILURE;
+		  }
+
+		/* Now open the source file. */
+		if (need_src && (ret = main_set_source (& stream, cmd, sfile, & source)))
+		  {
+		    return EXIT_FAILURE;
+		  }
+	      }
+	    else if (cmd == CMD_PRINTHDR ||
+		     cmd == CMD_PRINTHDRS ||
+		     cmd == CMD_PRINTDELTA)
+	      {
+		if (xd3_decoder_needs_source (& stream) && sfile->filename == NULL)
+		  {
+		    allow_fake_source = 1; 
+		    sfile->filename = "<placeholder>";
+		    main_set_source (& stream, cmd, sfile, & source);
+		  }
+	      }
+	  }
+	/* FALLTHROUGH */
+	case XD3_WINSTART:
+	  {
+	    /* Set or unset XD3_SKIP_WINDOW. */
+	    /*if (stream.current_window < option_first_window || stream.current_window > option_last_window)
+	      { stream_flags |= XD3_SKIP_WINDOW; }
+	    else
+  	      { stream_flags &= ~XD3_SKIP_WINDOW; }*/
+
+	    xd3_set_flags (& stream, stream_flags);
+	    goto again;
+	  }
+
+	case XD3_OUTPUT:
+	  {
+	    if (option_no_output == 0/* &&
+		stream.current_window >= option_first_window &&
+		stream.current_window <= option_last_window*/)
+	      {
+		/* Defer opening the output file until the stream produces its first
+		 * output for both encoder and decoder, this way we delay long enough for
+		 * the decoder to receive the application header.  (Or longer if there are
+		 * skipped windows, but I can't think of any reason not to delay open.) */
+		
+		if (! main_file_isopen (ofile) && (ret = main_open_output (& stream, ofile)) != 0)
+		  {
+		    return EXIT_FAILURE;
+		  }
+		if ((ret = output_func (& stream, ofile)) && (ret != PRINTHDR_SPECIAL))
+		  {
+		    return EXIT_FAILURE;
+		  }
+		if (ret == PRINTHDR_SPECIAL)
+		  {
+		    xd3_abort_stream (& stream);
+		    ret = EXIT_SUCCESS;
+		    goto done;
+		  }
+		ret = 0;
+	      }
+
+	    xd3_consume_output (& stream);
+	    goto again;
+	  }
+
+	case XD3_WINFINISH:
+	  {
+	    if (IS_ENCODE (cmd) || cmd == CMD_DECODE)
+	      {
+		int used_source = xd3_encoder_used_source (& stream);
+
+		if (! option_quiet && IS_ENCODE (cmd) && main_file_isopen (sfile) && ! used_source)
+		  {
+		    XPR(NT "warning: input position %"Q"u no source copies\n",
+			stream.current_window * source.blksize);
+		  }
+
+		if (option_verbose)
+		  {
+		    char rrateavg[32], wrateavg[32], tm[32];
+		    char rdb[32],  wdb[32],  sb[32];
+		    char trdb[32], twdb[32], tsb[32];
+		    char srcbuf[48], tsrcbuf[48];
+		    long millis = get_millisecs_since ();
+		    usize_t this_read = stream.total_in - last_total_in;
+		    usize_t this_write = stream.total_out - last_total_out;
+		    last_total_in = stream.total_in;
+		    last_total_out = stream.total_out;
+
+		    tsrcbuf[0] = srcbuf[0] = 0;
+		    if (used_source)
+		      {
+			sprintf (srcbuf, ": src %s", main_format_bcnt (xd3_encoder_srclen (& stream), sb));
+			sprintf (tsrcbuf, ": src %s", main_format_bcnt (stream.srcwin_cksum_pos, tsb));
+		      }
+		    /*if (stream.current_window >= option_first_window &&
+  		          stream.current_window <= option_last_window)*/
+		      {
+			XPR(NT "%"Q"u: in %s (%s): out %s (%s)%s: total in %s: out %s%s: %s\n",
+			    stream.current_window,
+			    main_format_bcnt (this_read, rdb),
+			    main_format_rate (this_read, millis, rrateavg),
+			    main_format_bcnt (this_write, wdb),
+			    main_format_rate (this_write, millis, wrateavg),
+			    srcbuf,
+			    main_format_bcnt (stream.total_in, trdb),
+			    main_format_bcnt (stream.total_out, twdb),
+			    tsrcbuf,
+			    main_format_millis (millis, tm));
+		      }
+		  }
+	      }
+	    goto again;
+	  }
+
+	default:
+	  /* input_func() error */
+	  XPR(NT XD3_LIB_ERRMSG (& stream, ret));
+	  return EXIT_FAILURE;
+	}
+    }
+  while (nread == config.winsize);
+done:
+  /* Close the inputs. (ifile must be open, sfile may be open) */
+  main_file_close (ifile);
+  main_file_close (sfile);
+
+  /* If output file is not open yet because of delayed-open, it means we never encountered
+   * a window in the delta, but it could have had a VCDIFF header?  TODO: solve this
+   * elsewhere.  For now, it prints "nothing to output" below, but the check doesn't
+   * happen in case of option_no_output. */
+  if (! option_no_output)
+    {
+      if (! main_file_isopen (ofile))
+	{
+	  XPR(NT "nothing to output: %s\n", ifile->filename);
+	  return EXIT_FAILURE;
+	}
+
+      /* Have to close the output before calling main_external_compression_finish, or else it hangs. */
+      if (main_file_close (ofile) != 0)
+	{
+	  return EXIT_FAILURE;
+	}
+    }
+
+  if ((ret = xd3_close_stream (& stream)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (& stream, ret));
+      return EXIT_FAILURE;
+    }
+
+#if EXTERNAL_COMPRESSION
+  if ((ret = main_external_compression_finish ())) { return EXIT_FAILURE; }
+#endif
+
+  xd3_free_stream (& stream);
+
+  if (option_verbose)
+    {
+      char tm[32];
+      long end_time = get_millisecs_now ();
+      XPR(NT "command finished in %s\n", main_format_millis (end_time - start_time, tm));
+    }
+  if (option_verbose > 1)
+    {
+      XPR(NT "input bytes:  %"Q"u\n",  ifile->nread);
+      XPR(NT "output bytes: %"Q"u\n", ofile->nwrite);
+    }
+
+  return EXIT_SUCCESS;
+}
+
+/* free memory before exit, reset single-use variables. */
+static void
+main_cleanup (void)
+{
+  int i;
+  
+  if (option_appheader) { appheader_used = NULL; }
+
+  main_free ((void**) & appheader_used);
+  main_free ((void**) & main_bdata);
+
+#if EXTERNAL_COMPRESSION
+  main_free ((void**) & ext_tmpfile);
+#endif
+
+  for (i = 0; lru && i < lru_size; i += 1)
+    {
+      main_free ((void**) & lru[i].blk);
+    }
+
+  main_free ((void**) & lru);
+
+  lru_hits = 0;
+  lru_misses = 0;
+  lru_filled = 0;
+
+  XD3_ASSERT (main_mallocs == 0);
+}
+
+int
+#if PYTHON_MODULE
+xd3_main_cmdline (int argc, char **argv)
+#else
+main (int argc, char **argv)
+#endif
+{
+  xd3_cmd cmd;
+  main_file ifile;
+  main_file ofile;
+  main_file sfile;
+  static char *flags = "0123456789cdefhnqvDJNRTVs:B:C:E:F:L:O:P:W:A::S::";
+  int my_optind;
+  char *my_optarg;
+  char *my_optstr;
+  char *sfilename;
+  int orig_argc = argc;
+  char **orig_argv = argv;
+  int ret;
+
+ go:  /* Go. */
+  cmd = CMD_NONE;
+  sfilename = NULL;
+  my_optind = 1;
+  argv = orig_argv;
+  argc = orig_argc;
+  program_name = argv[0];
+  extcomp_types[0].recomp_cmdname = program_name;
+  extcomp_types[0].decomp_cmdname = program_name;
+ takearg:
+  my_optarg = NULL;
+  my_optstr = argv[my_optind];
+  /* This doesn't use getopt() because it makes trouble for -P & python which reenter
+   * main() and thus care about freeing all memory.  I never had much trust for getopt
+   * anyway, it's too opaque.  This implements a fairly standard non-long-option getopt
+   * with support for named operations (e.g., "xdelta3 [encode|decode|printhdr...] < in >
+   * out").  I'll probably add long options at some point. See TODO. */
+  if (my_optstr)
+    {
+      if (*my_optstr == '-')    { my_optstr += 1; }
+      else if (cmd == CMD_NONE) { goto nonflag; }
+      else                      { my_optstr = NULL; }
+    }
+  while (my_optstr)
+    {
+      char *s;
+      my_optarg = NULL;
+      if ((ret = *my_optstr++) == 0) { my_optind += 1; goto takearg; }
+
+      /* Option handling: first check for one ':' following the option in flags, then
+       * check for two.  The syntax allows:
+       *
+       * 1. -Afoo                   defines optarg="foo"
+       * 2. -A foo                  defines optarg="foo"
+       * 3. -A ""                   defines optarg="" (allows optional empty-string)
+       * 4. -A [EOA or -moreargs]   error (mandatory case)
+       * 5. -A [EOA -moreargs]      defines optarg=NULL (optional case)
+       * 6. -A=foo                  defines optarg="foo"
+       * 7. -A=                     defines optarg="" (mandatory case)
+       * 8. -A=                     defines optarg=NULL (optional case)
+       *
+       * See tests in test_command_line_arguments().
+       */
+      s = strchr (flags, ret);
+      if (s && s[1] && s[1] == ':')
+	{
+	  int eqcase = 0;
+	  int option = s[2] && s[2] == ':';
+
+	  /* Case 1, set optarg to the remaining characters. */
+	  my_optarg = my_optstr;
+	  my_optstr = "";
+
+	  /* Case 2-5 */
+	  if (*my_optarg == 0)
+	    {
+	      /* Condition 4-5 */
+	      int have_arg = my_optind < (argc - 1) && *argv[my_optind+1] != '-';
+
+	      if (! have_arg)
+		{
+		  if (! option)
+		  {
+		    /* Case 4 */
+		    XPR(NT "-%c: requires an argument\n", ret);
+		    ret = EXIT_FAILURE;
+		    goto cleanup;
+		  }
+		  /* Case 5. */
+		  my_optarg = NULL;
+		}
+	      else
+		{
+		  /* Case 2-3. */
+		  my_optarg = argv[++my_optind];
+		}
+	    }
+	  /* Case 6-8. */
+	  else if (*my_optarg == '=')
+	    {
+	      /* Remove the = in all cases. */
+	      my_optarg += 1;
+	      eqcase = 1;
+
+	      if (option && *my_optarg == 0)
+		{
+		  /* Case 8. */
+		  my_optarg = NULL;
+		}
+	    }
+	}
+
+      switch (ret)
+	{
+	/* case: if no '-' was found, maybe check for a command name. */
+	nonflag:
+	       if (strcmp (my_optstr, "decode") == 0) { cmd = CMD_DECODE; }
+	  else if (strcmp (my_optstr, "encode") == 0)
+	    {
+#if XD3_ENCODER
+	      cmd = CMD_ENCODE;
+#else
+	      XPR(NT "encoder support not compiled\n");
+	      return EXIT_FAILURE;
+#endif
+	    }
+	  else if (strcmp (my_optstr, "config") == 0) { cmd = CMD_CONFIG; }
+#if REGRESSION_TEST
+	  else if (strcmp (my_optstr, "test") == 0) { cmd = CMD_TEST; }
+#endif
+#if VCDIFF_TOOLS
+	  else if (strcmp (my_optstr, "printhdr") == 0) { cmd = CMD_PRINTHDR; }
+	  else if (strcmp (my_optstr, "printhdrs") == 0) { cmd = CMD_PRINTHDRS; }
+	  else if (strcmp (my_optstr, "printdelta") == 0) { cmd = CMD_PRINTDELTA; }
+#endif
+
+	  /* If no option was found and still no command, let the default command be
+	   * encode.  The remaining args are treated as filenames. */
+	  if (cmd == CMD_NONE)
+	    {
+	      cmd = CMD_DEFAULT;
+	      my_optstr = NULL;
+	      break;
+	    }
+	  else
+	    {
+	      /* But if we find a command name, continue the getopt loop. */
+	      my_optind += 1;
+	      goto takearg;
+	    }
+
+	  /* gzip-like options */
+	case '0': case '1': case '2': case '3': case '4':
+	case '5': case '6': case '7': case '8': case '9':
+	  option_level = ret - '0';
+	  break;
+	case 'f': option_force = 1; break;
+	case 'v': option_verbose += 1; option_quiet = 0; break;
+	case 'q': option_quiet = 1; option_verbose = 0; break;
+	case 'c': option_stdout = 1; break;
+	case 'd':
+	  if (cmd == CMD_NONE) { cmd = CMD_DECODE; }
+	  else { ret = main_help (); goto exit; }
+	  break;
+	case 'e':
+#if XD3_ENCODER
+	  if (cmd == CMD_NONE) { cmd = CMD_ENCODE; }
+	  else { ret = main_help (); goto exit; }
+	  break;
+#else
+	  XPR(NT "encoder support not compiled\n");
+	  return EXIT_FAILURE;
+#endif	  
+
+	  //case 'F': if ((ret = main_strtoxoff (my_optarg, & option_first_window, 'F'))) { goto exit; } break;
+	  //case 'L': if ((ret = main_strtoxoff (my_optarg, & option_last_window, 'L'))) { goto exit; } break;
+	  //case 'O': if ((ret = main_strtoxoff (my_optarg, & option_first_offset, 'O'))) { goto exit; } break;
+	  //case 'E': if ((ret = main_strtoxoff (my_optarg, & option_last_offset, 'E'))) { goto exit; } break;
+
+	case 'P':
+	  /* only set profile count once, since... */
+	  if (option_profile_cnt == 0)
+	    {
+	      if ((ret = main_atou(my_optarg, (usize_t*) & option_profile_cnt, 0, 'P'))) { goto exit; }
+
+	      if (option_profile_cnt <= 0)
+		{
+		  ret = EXIT_SUCCESS;
+		  goto exit;
+		}
+	    }
+	  break;
+
+	case 'n': option_use_checksum = 0; break;
+	case 'N': option_no_compress = 1; break;
+	case 'T': option_use_altcodetable = 1; break;
+	case 'C': option_smatch_config = my_optarg; break;
+	case 'J': option_no_output = 1; break;
+	case 'S': if (my_optarg == NULL) { option_use_secondary = 0; }
+	          else { option_use_secondary = 1; option_secondary = my_optarg; } break;
+	case 'A': if (my_optarg == NULL) { option_use_appheader = 0; }
+	          else { option_appheader = (uint8_t*) my_optarg; } break;
+	case 'B': if ((ret = main_atou (my_optarg, & option_srcwinsz, MIN_BUFSIZE, 'B'))) { goto exit; } break;
+	case 'W': if ((ret = main_atou (my_optarg, & option_winsize, MIN_BUFSIZE, 'W'))) { goto exit; } break;
+	case 'D':
+#if EXTERNAL_COMPRESSION == 0
+	  if (! option_quiet)
+	    {
+	      XPR(NT "warning: -D option ignored, "
+		       "external compression support was not compiled\n");
+	    }
+#else
+	  option_decompress_inputs  = 0;
+#endif
+	  break;
+	case 'R':
+#if EXTERNAL_COMPRESSION == 0
+	  if (! option_quiet)
+	    {
+	      XPR(NT "warning: -R option ignored, "
+		       "external compression support was not compiled\n");
+	    }
+#else
+	  option_recompress_outputs = 0;
+#endif
+	  break;
+	case 's':
+	  if (sfilename != NULL)
+	    {
+	      XPR(NT "specify only one source file\n");
+	      goto cleanup;
+	    }
+
+	  sfilename = my_optarg;
+	  break;
+
+	case 'V':
+	  ret = main_version (); goto exit;
+	default:
+	  ret = main_help (); goto exit;
+	}
+    }
+
+  option_source_filename = sfilename;
+
+  /* In case there were no arguments, set the default command. */
+  if (cmd == CMD_NONE) { cmd = CMD_DEFAULT; }
+
+  argc -= my_optind;
+  argv += my_optind;
+
+  /* There may be up to two more arguments. */
+  if (argc > 2)
+    {
+      XPR(NT "too many filenames: %s ...\n", argv[2]);
+      ret = EXIT_FAILURE;
+      goto cleanup;
+    }
+
+  if (option_verbose > 1)
+    {
+      int l = 1;
+      int i;
+      char buf[1024];
+      for (i = 0; i < orig_argc; i += 1)
+	{
+	  l += strlen (orig_argv[i]) + 1;
+	}
+      buf[0] = 0;
+      for (i = 0; i < orig_argc; i += 1)
+	{
+	  strcat (buf, orig_argv[i]);
+	  strcat (buf, " ");
+	}
+      XPR(NT "command line: %s\n", buf);
+    }      
+
+  main_file_init (& ifile);
+  main_file_init (& ofile);
+  main_file_init (& sfile);
+
+  ifile.flags    = RD_FIRST;
+  sfile.flags    = RD_FIRST;
+  sfile.filename = option_source_filename;
+
+  /* The infile takes the next argument, if there is one.  But if not, infile is set to
+   * stdin. */
+  if (argc > 0)
+    {
+      ifile.filename = argv[0];
+
+      if ((ret = main_file_open (& ifile, ifile.filename, XO_READ)))
+	{
+	  goto cleanup;	  
+	}
+    }
+  else
+    {
+      XSTDIN_XF (& ifile);
+    }
+
+  /* The ofile takes the following argument, if there is one.  But if not, it is left NULL
+   * until the application header is processed.  It will be set in main_open_output. */
+  if (argc > 1)
+    {
+      /* Check for conflicting arguments. */
+      if (option_stdout && ! option_quiet)
+	{
+	  XPR(NT "warning: -c option overrides output filename: %s\n", argv[1]);
+	}
+
+      if (! option_stdout) { ofile.filename = argv[1]; }
+    }
+
+  switch (cmd)
+    {
+    case CMD_PRINTHDR:
+    case CMD_PRINTHDRS:
+    case CMD_PRINTDELTA:
+#if XD3_ENCODER
+    case CMD_ENCODE:
+      if (cmd == CMD_ENCODE)
+	{
+	  do_not_lru = 1;
+	}
+#endif
+    case CMD_DECODE:
+      ret = main_input (cmd, & ifile, & ofile, & sfile);
+      break;
+
+#if REGRESSION_TEST
+    case CMD_TEST:
+      ret = xd3_selftest ();
+      break;
+#endif
+
+    case CMD_CONFIG:
+      ret = main_config ();
+      break;
+
+    default:
+      ret = main_help ();
+      break;
+    }
+
+#if EXTERNAL_COMPRESSION
+  if (ext_tmpfile != NULL) { unlink (ext_tmpfile); }
+#endif
+
+  if (0)
+    {
+    cleanup:
+      ret = EXIT_FAILURE;
+    exit:
+      (void)0;
+    }
+
+  main_cleanup ();
+
+  if (--option_profile_cnt > 0 && ret == EXIT_SUCCESS) { goto go; }
+
+  return ret;
+}
+
+static int
+main_help (void)
+{
+  /* Not all options are shown, yet: 0-9, l J T C P F L O E
+   * Remember to update www/xdelta3-cmdline.html
+   */ 
+
+  main_version ();
+  P(RINT "usage: xdelta3 [command/options] [input [output]]\n");
+  P(RINT "commands are:\n");
+  P(RINT "    encode      encodes the input%s\n", XD3_ENCODER ? "" : " [Not compiled]");
+  P(RINT "    decode      decodes the input\n");
+  P(RINT "    config      prints xdelta3 configuration\n");
+#if REGRESSION_TEST
+  P(RINT "    test        run the builtin tests\n");
+#endif
+#if VCDIFF_TOOLS
+  P(RINT "special commands for VCDIFF inputs:\n");
+  P(RINT "    printhdr    print information about the first window\n");
+  P(RINT "    printhdrs   print information about all windows\n");
+  P(RINT "    printdelta  print information about the entire delta\n");
+#endif
+  P(RINT "options are:\n");
+  P(RINT "   -c           use stdout instead of default\n");
+  P(RINT "   -d           same as decode command\n");
+  P(RINT "   -e           same as encode command\n");
+  P(RINT "   -f           force overwrite\n");
+  P(RINT "   -n           disable checksum (encode/decode)\n");
+  P(RINT "   -D           disable external decompression (encode/decode)\n");
+  P(RINT "   -R           disable external recompression (decode)\n");
+  P(RINT "   -N           disable small string-matching compression\n");
+  P(RINT "   -S [djw|fgk] disable/enable secondary compression\n");
+  P(RINT "   -A [apphead] disable/provide application header\n");
+  P(RINT "   -s source    source file to copy from (if any)\n");
+  P(RINT "   -B blksize   source file block size\n");
+  P(RINT "   -W winsize   input window buffer size\n");
+  P(RINT "   -v           be verbose (max 2)\n");
+  P(RINT "   -q           be quiet\n");
+  P(RINT "   -h           show help\n");
+  P(RINT "   -V           show version\n");
+  P(RINT "   -P           repeat count (for profiling)\n");
+
+  return EXIT_FAILURE;
+}
diff --git a/xdelta3/xdelta3-python.h b/xdelta3/xdelta3-python.h
new file mode 100755
index 0000000..cfd6095
--- /dev/null
+++ b/xdelta3/xdelta3-python.h
@@ -0,0 +1,86 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2003 and onward.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "Python.h"
+
+static PyObject *pyxd3_error;
+
+/* spam: xdelta3.main([string,list,...]) */
+PyObject *xdelta3_main_cmdline (PyObject *self, PyObject *args)
+{
+  int ret, i, nargs;
+  char **argv = NULL;
+  int argc = 0;
+  PyObject *result = NULL;
+  PyObject *o;
+
+  if (! PyArg_ParseTuple (args, "O", &o)
+      || ! PyList_Check (o))
+    {
+      goto cleanup;
+    }
+
+  argc  = PyList_Size (o);
+  nargs = argc + 2;
+
+  if (! (argv = malloc (sizeof(argv[0]) * nargs)))
+    {
+      PyErr_NoMemory ();
+      goto cleanup;
+    }
+  memset (argv, 0, sizeof(argv[0]) * nargs);
+
+  for (i = 1; i < nargs-1; i += 1)
+    {
+      char *ps;
+      PyObject *s;
+      if ((s = PyList_GetItem (o, i-1)) == NULL) { goto cleanup; }
+      ps = PyString_AsString (s);
+      argv[i] = ps;
+    }
+
+  ret = xd3_main_cmdline (argc+1, argv);
+
+  if (ret == 0)
+    {
+      result = Py_BuildValue ("i", ret);
+    }
+  else
+    {
+      PyErr_SetString (pyxd3_error, "failed :(");
+    }
+ cleanup:
+  if (argv)
+    {
+      free (argv);
+    }
+  return result;
+}
+static PyMethodDef xdelta3_methods[] = {
+  { "main", xdelta3_main_cmdline, METH_VARARGS, "xdelta3 main()" },
+  { NULL, NULL }
+};
+
+DL_EXPORT(void) initxdelta3 (void)
+{
+  PyObject *m, *d;
+  m = Py_InitModule ("xdelta3", xdelta3_methods);
+  d = PyModule_GetDict (m);
+  pyxd3_error = PyErr_NewException ("xdelta3.error", NULL, NULL);
+  PyDict_SetItemString (d, "error", pyxd3_error);
+}
diff --git a/xdelta3/xdelta3-regtest.py b/xdelta3/xdelta3-regtest.py
new file mode 100755
index 0000000..f3313a4
--- /dev/null
+++ b/xdelta3/xdelta3-regtest.py
@@ -0,0 +1,596 @@
+#!/usr/bin/python2.3
+# xdelta 3 - delta compression tools and library
+# Copyright (C) 2003 and onward.  Joshua P. MacDonald
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# Under construction.
+
+# TODO: This is really part test, part performance evaluation suite, and
+# really incomplete.
+
+import os, sys, math, re, time, types, array
+import xdelta3
+
+HIST_SIZE      = 10   # the number of buckets
+MIN_SIZE       = 0
+
+TIME_TOO_SHORT = 0.050
+
+MIN_REPS       = 1
+MAX_REPS       = 1
+SKIP_TRIALS    = 1
+MIN_TRIALS     = 3
+MAX_TRIALS     = 15
+
+SKIP_TRIALS    = 0
+MIN_TRIALS     = 1
+MAX_TRIALS     = 1
+
+MIN_STDDEV_PCT = 1.5 # stop
+MAX_RUN        = 1000 * 1000 * 10
+
+XD3CMD = './xdelta3-64'
+#XD3CMD = './xdelta3'
+
+# kind:
+PYEXT = 1
+FORK  = 0
+
+#
+#
+RCSDIR = '/Volumes/LACIE120RAID/orbit_linux/home/jmacd/PRCS/prcs/b'
+
+TMPDIR = '/tmp/xd3regtest.%d' % os.getpid()
+
+RUNFILE = os.path.join(TMPDIR, 'run')
+HFILE   = os.path.join(TMPDIR, 'hdr')
+DFILE   = os.path.join(TMPDIR, 'output')
+RFILE   = os.path.join(TMPDIR, 'recon')
+
+HEAD_STATE = 0
+BAR_STATE  = 1
+REV_STATE  = 2
+DATE_STATE = 3
+
+# rcs output
+RE_TOTREV  = re.compile('total revisions: (\\d+)')
+RE_BAR     = re.compile('----------------------------')
+RE_REV     = re.compile('revision (.+)')
+RE_DATE    = re.compile('date: ([^;]+);.*')
+# xdelta output
+RE_HDRSZ   = re.compile('VCDIFF header size: +(\\d+)')
+RE_EXTCOMP = re.compile('XDELTA ext comp.*')
+
+#
+# exceptions
+class SkipRcsException:
+    def __init__(self,reason):
+        self.reason = reason
+class NotEnoughVersions:
+    def __init__(self):
+        pass
+class CommandError:
+    def __init__(self,cmd,str):
+        if type(cmd) is types.TupleType or \
+           type(cmd) is types.ListType:
+            cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd)
+        print 'command was: ',cmd
+        print 'command failed: ',str
+        print 'have fun debugging'
+#
+# one version
+class RcsVersion:
+    def __init__(self,vstr):
+        self.vstr = vstr
+    def __cmp__(self,other):
+        return cmp(self.date, other.date)
+    def Print(self):
+        print '%s %s' % (self.vstr, self.date)
+#
+# one rcsfile
+class RcsFile:
+
+    def __init__(self, fname):
+        self.fname    = fname
+        self.versions = []
+        self.state    = HEAD_STATE
+
+    def SetTotRev(self,s):
+        self.totrev = int(s)
+
+    def Rev(self,s):
+        self.rev = RcsVersion(s)
+        if len(self.versions) >= self.totrev:
+            raise SkipRcsException('too many versions (in log messages)')
+        self.versions.append(self.rev)
+
+    def Date(self,s):
+        self.rev.date = s
+
+    def Match(self, line, state, rx, gp, newstate, f):
+        if state == self.state:
+            m = rx.match(line)
+            if m:
+                if f:
+                    f(m.group(gp))
+                self.state = newstate
+                return 1
+        return None
+
+    def Sum1Rlog(self):
+        f = os.popen('rlog '+self.fname, "r")
+        l = f.readline()
+        while l:
+            if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev):
+                pass
+            elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None):
+                pass
+            elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev):
+                pass
+            elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date):
+                pass
+            l = f.readline()
+        c = f.close()
+        if c != None:
+            raise c
+        #print '%s versions %d' % (self.fname, len(self.versions))
+        #for v in self.versions:
+        #    v.Print()
+
+    def Sum1(self):
+        st = os.stat(self.fname)
+        self.rcssize = st.st_size
+        self.Sum1Rlog()
+        if self.totrev != len(self.versions):
+            raise SkipRcsException('wrong version count')
+        self.versions.sort()
+
+    def Checkout(self,n):
+        v      = self.versions[n]
+        out    = open(self.Verf(n), "w")
+        cmd    = 'co -ko -p%s %s' % (v.vstr, self.fname)
+        total  = 0
+        (inf,
+         stream,
+         err)  = os.popen3(cmd, "r")
+        inf.close()
+        buf    = stream.read()
+        while buf:
+            total = total + len(buf)
+            out.write(buf)
+            buf = stream.read()
+        v.vsize = total
+        estr = ''
+        buf = err.read()
+        while buf:
+            estr = estr + buf
+            buf = err.read()
+        if stream.close():
+            raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr))
+        out.close()
+        err.close()
+
+    def Vdate(self,n):
+        return self.versions[n].date
+
+    def Vstr(self,n):
+        return self.versions[n].vstr
+
+    def Verf(self,n):
+        return os.path.join(TMPDIR, 'input.%d' % n)
+
+    def PairsByDate(self,runnable):
+        if self.totrev < 2:
+            raise NotEnoughVersions()
+        self.Checkout(0)
+        ntrials = []
+        if self.totrev < 2:
+            return vtrials
+        for v in range(0,self.totrev-1):
+            if v > 1:
+                os.remove(self.Verf(v-1))
+            self.Checkout(v+1)
+            if os.stat(self.Verf(v)).st_size < MIN_SIZE or \
+               os.stat(self.Verf(v+1)).st_size < MIN_SIZE:
+                continue
+            
+            result = TimeRun(runnable.Runner(self.Verf(v),
+                                             self.Vstr(v),
+                                             self.Verf(v+1),
+                                             self.Vstr(v+1)))
+            print 'testing %s %s: ideal %.3f%%: time %.7f: in %u/%u trials' % \
+                  (os.path.basename(self.fname),
+                   self.Vstr(v+1),
+                   result.r1.ideal,
+                   result.time.mean,
+                   result.trials,
+                   result.reps)
+            ntrials.append(result)
+            
+        os.remove(self.Verf(self.totrev-1))
+        os.remove(self.Verf(self.totrev-2))
+        return ntrials
+#
+# This class recursively scans a directory for rcsfiles
+class RcsFinder:
+    def __init__(self):
+        self.subdirs  = []
+        self.rcsfiles = []
+        self.others   = []
+        self.skipped  = []
+
+    def Scan1(self,dir):
+        dents = os.listdir(dir)
+        subdirs  = []
+        rcsfiles = []
+        others   = []
+        for dent in dents:
+            full = os.path.join(dir, dent)
+            if os.path.isdir(full):
+                subdirs.append(full)
+            elif dent[len(dent)-2:] == ",v":
+                rcsfiles.append(RcsFile(full))
+            else:
+                others.append(full)
+        self.subdirs  = self.subdirs  + subdirs
+        self.rcsfiles = self.rcsfiles + rcsfiles
+        self.others   = self.others   + others
+        return subdirs
+
+    def Crawl(self, dir):
+        subdirs = [dir]
+        while subdirs:
+            s1 = self.Scan1(subdirs[0])
+            subdirs = subdirs[1:] + s1
+
+    def Summarize(self):
+        good = []
+        for rf in self.rcsfiles:
+            try:
+                rf.Sum1()
+                if rf.totrev < 2:
+                    raise SkipRcsException('too few versions (< 2)')
+            except SkipRcsException, e:
+                #print 'skipping file %s: %s' % (rf.fname, e.reason)
+                self.skipped.append(rf)
+            else:
+                good.append(rf)
+        self.rcsfiles = good
+
+    def PairsByDate(self,runnable):
+        allvtrials = []
+        good = []
+        for rf in self.rcsfiles:
+            print 'testing %s on %s with %d versions' % (runnable.type, rf.fname, rf.totrev)
+            try:
+                allvtrials.append(rf.PairsByDate(runnable))
+            except SkipRcsException:
+                print 'file %s has compressed versions: skipping' % (rf.fname)
+            except NotEnoughVersions:
+                print 'testing %s on %s: not enough versions' % (runnable.type, rf.fname)
+            else:
+                good.append(rf)
+        self.rcsfiles = good
+        return allvtrials
+#
+#
+class Bucks:
+    def __init__(self,low,high):
+        self.low    = low
+        self.high   = high
+        self.spread = high - low
+        self.bucks  = []
+        for i in range(0,HIST_SIZE):
+            self.bucks.append([low+(self.spread * (i+0.0) / float(HIST_SIZE)),
+                               low+(self.spread * (i+0.5) / float(HIST_SIZE)),
+                               low+(self.spread * (i+1.0) / float(HIST_SIZE)),
+                               0])
+    def Add(self, x):
+        assert(x>=self.low)
+        assert(x<self.high)
+        t = self.bucks[int((x-self.low)/float(self.spread)*HIST_SIZE)]
+        t[3] = t[3] + 1
+    def Print(self, f):
+        for i in self.bucks:
+            # gnuplot -persist "plot %s using 2:4
+            f.write("%.1f %.1f %.1f %d\n" % (i[0],i[1],i[2],i[3]))
+#
+#
+class TimeRun:
+    def __init__(self,runnable,set_reps=1,reps=MIN_REPS,max_reps=MAX_REPS,\
+                 skip_trials=SKIP_TRIALS,min_trials=MIN_TRIALS,max_trials=MAX_TRIALS, \
+                 min_stddev_pct=MIN_STDDEV_PCT):
+
+        min_trials = min(min_trials,max_trials)
+        self.trials   = 0
+        self.measured = []
+        self.r1       = None
+        self.reps     = reps
+        while 1:
+            try:
+                os.remove(DFILE)
+                os.remove(RFILE)
+            except OSError:
+                pass
+
+            start_time  = time.time()
+            start_clock = time.clock()
+
+            result = runnable.Run(self.trials, self.reps)
+
+            if self.r1 == None:
+                self.r1 = result
+
+            total_clock = (time.clock() - start_clock)
+            total_time  = (time.time()  - start_time)
+
+            elap_time  = max((total_time) / self.reps,  0.000001)
+            elap_clock = max((total_clock) / self.reps, 0.000001)
+
+            #print 'trial: %d' % self.trials
+            if set_reps and runnable.canrep and total_time < TIME_TOO_SHORT and self.reps < max_reps:
+                self.reps = max(self.reps+1,int(self.reps * TIME_TOO_SHORT / total_time))
+                self.reps = min(self.reps,max_reps)
+                #print 'continue: need more reps: %d' % self.reps
+                continue
+
+            self.trials = self.trials + 1
+
+            # skip some of the first trials
+            if self.trials > skip_trials:
+                self.measured.append((elap_clock,elap_time))
+                #print 'measurement total: %.1f ms' % (total_time * 1000.0)
+
+            # at least so many
+            if self.trials < (skip_trials + min_trials):
+                #print 'continue: need more trials: %d' % self.trials
+                continue
+
+            # compute %variance
+            done = 0
+            if skip_trials + min_trials <= 2:
+                done = 1
+                self.measured = self.measured + self.measured;
+
+            self.time = StatList([x[1] for x in self.measured], 'elap time')
+            sp = float(self.time.s) / float(self.time.mean)
+
+            # what if MAX_TRIALS is exceeded?
+            too_many = (self.trials-skip_trials) >= max_trials
+            good     = (100.0 * sp) < min_stddev_pct
+            if done or too_many or good:
+                self.trials = self.trials - skip_trials
+                if not done and not good:
+                    #print 'too many trials: %d' % self.trials
+                    pass
+                self.clock  = StatList([x[0] for x in self.measured], 'elap clock')
+                return
+#
+#
+#
+def SumList(l):
+    return reduce(lambda x,y: x+y, l)
+#
+# returns (total, mean, stddev, q2 (median),
+#          (q3-q1)/2 ("semi-interquartile range"), max-min (spread))
+class StatList:
+    def __init__(self,l,desc,hist=0):
+        cnt = len(l)
+        assert(cnt > 1)
+        l.sort()
+        self.cnt    = cnt
+        self.l      = l
+        self.total  = SumList(l)
+        self.mean   = self.total / float(self.cnt)
+        self.s      = math.sqrt(SumList([(x-self.mean) * (x - self.mean) for x in l]) / float(self.cnt-1))
+        self.q0     = l[0]
+        self.q1     = l[int(self.cnt/4.0+0.5)]
+        self.q2     = l[int(self.cnt/2.0+0.5)]
+        self.q3     = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))]
+        self.q4     = l[self.cnt-1]+1
+        self.hf     = "./%s.hist" % desc
+        self.siqr   = (self.q3-self.q1)/2.0;
+        self.spread = (self.q4-self.q0)
+        self.str    = '%s %d; mean %d; sdev %d; q2 %d; .5(q3-q1) %.1f; spread %d' % \
+                      (desc, self.total, self.mean, self.s, self.q2, self.siqr, self.spread)
+        if hist:
+            f = open(self.hf, "w")
+            self.bucks = Bucks(self.q0,self.q4)
+            for i in l:
+                self.bucks.Add(i)
+            self.bucks.Print(f)
+            f.close()
+
+def RunCommand(args):
+    #print "run command", args
+    p = os.spawnvp(os.P_WAIT, args[0], args)
+    if p != 0: 
+        raise CommandError(args, 'exited %d' % p)
+
+def RunCommandIO(args,infn,outfn):
+    #print "run command io", args
+    p = os.fork()
+    if p == 0:
+        os.dup2(os.open(infn,os.O_RDONLY),0)
+        os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1)
+        os.execvp(args[0], args)
+    else:
+        s = os.waitpid(p,0)
+        o = os.WEXITSTATUS(s[1])
+        if not os.WIFEXITED(s[1]) or o != 0:
+            raise CommandError(args, 'exited %d' % o)
+
+def RunXdelta3(args,kind=FORK):
+    if 0: # kind == FORK:
+        RunCommand([XD3CMD] + args)
+    else:
+        try:
+            xdelta3.main(args)
+        except Exception, e:
+            raise CommandError(args, "xdelta3.main exception")
+
+class GzipInfo:
+    def __init__(self,target,delta):
+        self.tgtsize = os.stat(target).st_size
+        self.dsize   = os.stat(delta).st_size
+        
+class Xdelta3Info:
+    def __init__(self,target,delta):
+        RunXdelta3(['printhdr',
+                    '-f',
+                    delta,
+                    HFILE])
+        o = open(HFILE, "r")
+        l = o.readline()
+        self.extcomp = 0
+        self.hdrsize = 0
+        self.tgtsize = os.stat(target).st_size
+        self.dsize   = os.stat(delta).st_size
+        if self.tgtsize > 0:
+            self.ideal = 100.0 * self.dsize / self.tgtsize;
+        else:
+            self.ideal = 0.0
+        while l:
+            #print l.strip()
+            m = RE_HDRSZ.match(l)
+            if m:
+                self.hdrsize = int(m.group(1))
+            m = RE_EXTCOMP.match(l)
+            if m:
+                #print 'EXTCOMP', m.group(0)
+                self.extcomp = 1
+            l = o.readline()
+        if self.hdrsize == 0:
+            raise CommandError(cmd, 'no hdrsize')
+        o.close()
+
+class Xdelta3Pair:
+    def __init__(self):
+        self.type        = 'xdelta3'
+        self.decode_args = '-dqf'
+        self.encode_args = '-eqf'
+        self.presrc      = '-s'
+        self.canrep      = 1
+
+    def Runner(self,old,oldv,new,newv):
+        self.old = old
+        self.oldv = oldv
+        self.new = new
+        self.newv = newv        
+        return self
+
+    def Run(self,trial,reps):
+        RunXdelta3(['-P',
+                    '%d' % reps,
+                    self.encode_args,
+                    self.presrc,
+                    self.old,
+                    self.new,
+                    DFILE])
+        if trial > 0:
+            return None
+        self.dinfo = Xdelta3Info(self.new,DFILE)
+        if self.dinfo.extcomp:
+            raise SkipRcsException('ext comp')
+        RunXdelta3([self.decode_args,
+                    self.presrc,
+                    self.old,
+                    DFILE,
+                    RFILE])
+        RunCommand(('cmp',
+                    self.new,
+                    RFILE))
+        return self.dinfo
+
+def Test():
+    rcsf = RcsFinder()
+    rcsf.Crawl(RCSDIR)
+    if len(rcsf.rcsfiles) == 0:
+        sys.exit(1)
+    rcsf.Summarize()
+    print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (len(rcsf.rcsfiles),
+                                                                        len(rcsf.subdirs),
+                                                                        len(rcsf.others),
+                                                                        len(rcsf.skipped))
+    print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize", 1).str
+    print StatList([x.totrev for x in rcsf.rcsfiles], "totrev", 1).str
+    pairs = rcsf.PairsByDate(Xdelta3Pair())
+
+def Decimals(max):
+    l = [0]
+    step = 1
+    while 1:
+        r = range(step, step * 10, step)
+        l = l + r
+        if step * 10 >= max:
+            l.append(step * 10)
+            break
+        step = step * 10
+    return l
+
+class Xdelta3Run1:
+    def __init__(self,file,kind,reps=0):
+        self.file = file
+        self.reps = reps
+        self.canrep = 1
+        self.kind   = kind
+    def Run(self,trial,reps):
+        if self.reps:
+            assert(reps == 1)
+            reps = self.reps
+        RunXdelta3(['-P', '%d' % reps, '-efq', self.file, DFILE],kind=self.kind)
+        if trial > 0:
+            return None
+        return Xdelta3Info(self.file,DFILE)
+
+class GzipRun1:
+    def __init__(self,file):
+        self.file = file
+        self.canrep = 0
+    def Run(self,trial,reps):
+        assert(reps == 1)
+        RunCommandIO(['gzip', '-cf'], self.file, DFILE)
+        if trial > 0:
+            return None
+        return GzipInfo(self.file,DFILE)
+
+def SetFileSize(F,L):
+    fd = os.open(F, os.O_CREAT | os.O_WRONLY)
+    os.ftruncate(fd,L)
+    assert(os.fstat(fd).st_size == L)
+    os.close(fd)
+
+def ReportSpeed(L,tr,desc):
+    print '%s 0-run length %u: dsize %u: time %.3f ms: encode %.0f B/sec: in %ux%u trials' % \
+          (desc, L, tr.r1.dsize, tr.time.mean * 1000.0, ((L+tr.r1.dsize) / tr.time.mean), tr.trials, tr.reps)
+
+def RunSpeed():
+    for L in Decimals(MAX_RUN):
+        SetFileSize(RUNFILE, L)
+        trx = TimeRun(Xdelta3Run1(RUNFILE,kind=PYEXT))
+        ReportSpeed(L,trx,'xdelta3')
+        trg = TimeRun(GzipRun1(RUNFILE))
+        ReportSpeed(L,trg,'gzip   ')
+
+if __name__ == "__main__":
+    try:
+        os.mkdir(TMPDIR)
+        Test()
+        RunSpeed()
+    except CommandError:
+        pass
+    else:
+        RunCommand(['rm', '-rf', TMPDIR])
diff --git a/xdelta3/xdelta3-second.h b/xdelta3/xdelta3-second.h
new file mode 100755
index 0000000..89287f0
--- /dev/null
+++ b/xdelta3/xdelta3-second.h
@@ -0,0 +1,363 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2002 and onward.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _XDELTA3_SECOND_H_
+#define _XDELTA3_SECOND_H_
+
+/******************************************************************************************
+ Secondary compression
+ ******************************************************************************************/
+
+#define xd3_sec_data(s) ((s)->sec_stream_d)
+#define xd3_sec_inst(s) ((s)->sec_stream_i)
+#define xd3_sec_addr(s) ((s)->sec_stream_a)
+
+struct _xd3_sec_type
+{
+  int         id;
+  const char *name;
+  xd3_secondary_flags flags;
+
+  /* xd3_sec_stream is opaque to the generic code */
+  xd3_sec_stream* (*alloc)   (xd3_stream     *stream);
+  void            (*destroy) (xd3_stream     *stream,
+			      xd3_sec_stream *sec);
+  void            (*init)    (xd3_sec_stream *sec);
+  int             (*decode)  (xd3_stream     *stream,
+			      xd3_sec_stream *sec_stream,
+			      const uint8_t **input,
+			      const uint8_t  *input_end,
+			      uint8_t       **output,
+			      const uint8_t  *output_end);
+#if XD3_ENCODER
+  int             (*encode)  (xd3_stream     *stream,
+			      xd3_sec_stream *sec_stream,
+			      xd3_output     *input,
+			      xd3_output     *output,
+			      xd3_sec_cfg    *cfg);
+#endif
+};
+
+#define BIT_STATE_ENCODE_INIT { 0, 1 }
+#define BIT_STATE_DECODE_INIT { 0, 0x100 }
+
+typedef struct _bit_state bit_state;
+struct _bit_state
+{
+  usize_t cur_byte;
+  usize_t cur_mask;
+};
+
+static INLINE void xd3_bit_state_encode_init  (bit_state       *bits)
+{
+  bits->cur_byte = 0;
+  bits->cur_mask = 1;
+}
+
+static INLINE int xd3_decode_bits     (xd3_stream     *stream,
+				       bit_state      *bits,
+				       const uint8_t **input,
+				       const uint8_t  *input_max,
+				       usize_t          nbits,
+				       usize_t         *valuep)
+{
+  usize_t value = 0;
+  usize_t vmask = 1 << nbits;
+
+  if (bits->cur_mask == 0x100) { goto next_byte; }
+
+  for (;;)
+    {
+      do
+	{
+	  vmask >>= 1;
+
+	  if (bits->cur_byte & bits->cur_mask)
+	    {
+	      value |= vmask;
+	    }
+
+	  IF_DEBUG1 (P(RINT "[dbits] %u", (bits->cur_byte & bits->cur_mask) && 1));
+
+	  bits->cur_mask <<= 1;
+
+	  if (vmask == 1) { goto done; }
+	}
+      while (bits->cur_mask != 0x100);
+
+    next_byte:
+
+      if (*input == input_max)
+	{
+	  stream->msg = "secondary decoder end of input";
+	  return EINVAL;
+	}
+
+      bits->cur_byte = *(*input)++;
+      bits->cur_mask = 1;
+    }
+
+ done:
+
+  (*valuep) = value;
+  return 0;
+}
+
+static INLINE int xd3_decode_bit     (xd3_stream     *stream,
+				      bit_state      *bits,
+				      const uint8_t **input,
+				      const uint8_t  *input_max,
+				      usize_t         *valuep)
+{
+  if (bits->cur_mask == 0x100)
+    {
+      if (*input == input_max)
+	{
+	  stream->msg = "secondary decoder end of input";
+	  return EINVAL;
+	}
+
+      bits->cur_byte = *(*input)++;
+      bits->cur_mask = 1;
+    }
+
+  *valuep = (bits->cur_byte & bits->cur_mask) && 1;
+
+  IF_DEBUG1 (P(RINT "[dbit] %u", (bits->cur_byte & bits->cur_mask) && 1));
+
+  bits->cur_mask <<= 1;
+
+  return 0;
+}
+
+#if REGRESSION_TEST
+/* There may be extra bits at the end of secondary decompression, this macro checks for
+ * non-zero bits.  This is overly strict, but helps pass the single-bit-error regression
+ * test. */
+static int
+xd3_test_clean_bits (xd3_stream *stream, bit_state *bits)
+{
+  for (; bits->cur_mask != 0x100; bits->cur_mask <<= 1)
+    {
+      if (bits->cur_byte & bits->cur_mask)
+	{
+	  stream->msg = "secondary decoder garbage";
+	  return EINVAL;
+	}
+    }
+
+  return 0;
+}
+#endif
+
+static xd3_sec_stream*
+xd3_get_secondary (xd3_stream *stream, xd3_sec_stream **sec_streamp)
+{
+  xd3_sec_stream *sec_stream;
+
+  if ((sec_stream = *sec_streamp) == NULL)
+    {
+      if ((*sec_streamp = stream->sec_type->alloc (stream)) == NULL)
+	{
+	  return NULL;
+	}
+
+      sec_stream = *sec_streamp;
+
+      /* If cuumulative stats, init once. */
+      stream->sec_type->init (sec_stream);
+    }
+
+  return sec_stream;
+}
+
+static int
+xd3_decode_secondary (xd3_stream      *stream,
+		      xd3_desect      *sect,
+		      xd3_sec_stream **sec_streamp)
+{
+  xd3_sec_stream *sec_stream;
+  uint32_t dec_size;
+  uint8_t *out_used;
+  int ret;
+
+  if ((sec_stream = xd3_get_secondary (stream, sec_streamp)) == NULL) { return ENOMEM; }
+
+  /* Decode the size, allocate the buffer. */
+  if ((ret = xd3_read_size (stream, & sect->buf, sect->buf_max, & dec_size)) ||
+      (ret = xd3_decode_allocate (stream, dec_size, & sect->copied2, & sect->alloc2, NULL, NULL)))
+    {
+      return ret;
+    }
+
+  out_used = sect->copied2;
+
+  if ((ret = stream->sec_type->decode (stream, sec_stream,
+				       & sect->buf, sect->buf_max,
+				       & out_used, out_used + dec_size))) { return ret; }
+
+  if (sect->buf != sect->buf_max)
+    {
+      stream->msg = "secondary decoder finished with unused input";
+      return EINVAL;
+    }
+
+  if (out_used != sect->copied2 + dec_size)
+    {
+      stream->msg = "secondary decoder short output";
+      return EINVAL;
+    }
+
+  sect->buf     = sect->copied2;
+  sect->buf_max = sect->copied2 + dec_size;
+
+  return 0;
+}
+
+#if XD3_ENCODER
+/* OPT: Should these be inline? */
+static INLINE int xd3_encode_bit       (xd3_stream      *stream,
+					xd3_output     **output,
+					bit_state       *bits,
+					int              bit)
+{
+  int ret;
+
+  if (bit)
+    {
+      bits->cur_byte |= bits->cur_mask;
+    }
+
+  IF_DEBUG1 (P(RINT "[ebit] %u", bit && 1));
+
+  /* OPT: Might help to buffer more than 8 bits at once. */
+  if (bits->cur_mask == 0x80)
+    {
+      if ((ret = xd3_emit_byte (stream, output, bits->cur_byte)) != 0) { return ret; }
+
+      bits->cur_mask = 1;
+      bits->cur_byte = 0;
+    }
+  else
+    {
+      bits->cur_mask <<= 1;
+    }
+
+  return 0;
+}
+
+static INLINE int xd3_flush_bits       (xd3_stream      *stream,
+					xd3_output     **output,
+					bit_state       *bits)
+{
+  return (bits->cur_mask == 1) ? 0 : xd3_emit_byte (stream, output, bits->cur_byte);
+}
+
+static INLINE int xd3_encode_bits      (xd3_stream      *stream,
+					xd3_output     **output,
+					bit_state       *bits,
+					usize_t           nbits,
+					usize_t           value)
+{
+  int ret;
+  usize_t mask = 1 << nbits;
+
+  XD3_ASSERT (nbits > 0);
+  XD3_ASSERT (nbits < sizeof (usize_t) * 8);
+  XD3_ASSERT (value < mask);
+
+  do
+    {
+      mask >>= 1;
+
+      if ((ret = xd3_encode_bit (stream, output, bits, value & mask))) { return ret; }
+    }
+  while (mask != 1);
+
+  return 0;
+}
+
+static int
+xd3_encode_secondary (xd3_stream      *stream,
+		      xd3_output     **head,
+		      xd3_output     **tail,
+		      xd3_sec_stream **sec_streamp,
+		      xd3_sec_cfg     *cfg,
+		      int             *did_it)
+{
+  xd3_sec_stream *sec_stream;
+  xd3_output     *tmp_head;
+  xd3_output     *tmp_tail;
+
+  usize_t comp_size;
+  usize_t orig_size;
+
+  int ret;
+
+  orig_size = xd3_sizeof_output (*head);
+
+  if (orig_size < SECONDARY_MIN_INPUT) { return 0; }
+
+  if ((sec_stream = xd3_get_secondary (stream, sec_streamp)) == NULL) { return ENOMEM; }
+
+  tmp_head = xd3_alloc_output (stream, NULL);
+
+  /* Encode the size, encode the data.  @@ Encoding the size makes it simpler, but is a
+   * little gross.  Should not need the entire section in contiguous memory, but it is
+   * much easier this way. */
+  if ((ret = xd3_emit_size (stream, & tmp_head, orig_size)) ||
+      (ret = stream->sec_type->encode (stream, sec_stream, *head, tmp_head, cfg))) { goto getout; }
+
+  /* If the secondary compressor determines its no good, it returns XD3_NOSECOND. */
+
+  /* Setup tmp_tail, comp_size */
+  tmp_tail  = tmp_head;
+  comp_size = tmp_head->next;
+
+  while (tmp_tail->next_page != NULL)
+    {
+      tmp_tail = tmp_tail->next_page;
+      comp_size += tmp_tail->next;
+    }
+
+  XD3_ASSERT (comp_size == xd3_sizeof_output (tmp_head));
+  XD3_ASSERT (tmp_tail != NULL);
+
+  if (comp_size < (orig_size - SECONDARY_MIN_SAVINGS))
+    {
+      IF_DEBUG1(P(RINT "secondary saved %u bytes: %u -> %u (%0.2f%%)\n",
+			 orig_size - comp_size, orig_size, comp_size,
+			 (double) comp_size / (double) orig_size));
+
+      xd3_free_output (stream, *head);
+
+      *head = tmp_head;
+      *tail = tmp_tail;
+      *did_it = 1;
+    }
+  else
+    {
+    getout:
+      if (ret == XD3_NOSECOND) { ret = 0; }
+      xd3_free_output (stream, tmp_head);
+    }
+
+  return ret;
+}
+#endif /* XD3_ENCODER */
+#endif /* _XDELTA3_SECOND_H_ */
diff --git a/xdelta3/xdelta3-test.h b/xdelta3/xdelta3-test.h
new file mode 100755
index 0000000..198d440
--- /dev/null
+++ b/xdelta3/xdelta3-test.h
@@ -0,0 +1,2229 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2001, 2003, 2004, 2005, 2006.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <math.h>
+#include <sys/wait.h>
+
+#define MSG_IS(x) (stream->msg != NULL && strcmp ((x), stream->msg) == 0)
+
+static const usize_t TWO_MEGS_AND_DELTA = (2 << 20) + (1 << 10);
+static const usize_t ADDR_CACHE_ROUNDS = 10000;
+
+static const usize_t TEST_FILE_MEAN    = 16384;
+static const double TEST_ADD_MEAN     = 16;
+static const double TEST_ADD_MAX      = 256;
+static const double TEST_ADD_RATIO    = 0.1;
+static const double TEST_EPSILON      = 0.5;
+
+static char   TEST_TARGET_FILE[32];
+static char   TEST_SOURCE_FILE[32];
+static char   TEST_DELTA_FILE[32];
+static char   TEST_RECON_FILE[32];
+static char   TEST_RECON2_FILE[32];
+static char   TEST_COPY_FILE[32];
+
+static int TESTBUFSIZE = 1024 * 16;
+
+static int test_exponential_dist (usize_t mean, usize_t max);
+
+/* TODO
+ *
+ * 1. Test state changes: that config is called before open, open called before
+ * encode/decode..., close incomplete works, invalid options, consume_output always
+ * called, no mixing of encode/decode, etc.
+ *
+ * 2. Test window selection, window alignment, 1.5 pass alg vs. greedy
+ */
+
+/******************************************************************************************
+ TEST HELPERS
+ ******************************************************************************************/
+
+static void DOT (void) { P(RINT "."); }
+static int do_cmd (xd3_stream *stream, const char *buf)
+{
+  int ret;
+  if ((ret = system (buf)) != 0)
+    {
+      if (WIFEXITED (ret))
+	{
+	  stream->msg = "command exited non-zero";
+	}
+      else
+	{
+	  stream->msg = "abnormal command termination";
+	}
+      return EINVAL;
+    }
+  DOT ();
+  return 0;
+}
+static int do_fail (xd3_stream *stream, const char *buf)
+{
+  int ret;
+  ret = system (buf);
+  if (! WIFEXITED (ret) || WEXITSTATUS (ret) != 1)
+    {
+      stream->msg = "command should have not succeeded";
+      P(RINT "command was %s", buf);
+      return EINVAL;
+    }
+  DOT ();
+  return 0;
+}
+
+static int
+test_exponential_dist (usize_t mean, usize_t max)
+{
+  double mean_d = mean;
+  double erand  = log (1.0 / drand48 ());
+  usize_t x = (usize_t) (mean_d * erand + 0.5);
+
+  return min (x, max);
+}
+
+/* Test that the exponential distribution actually produces its mean. */
+static int
+test_random_numbers (xd3_stream *stream, int ignore)
+{
+  int i;
+  usize_t sum = 0;
+  usize_t mean = 50;
+  usize_t n_rounds = 10000;
+  double average, error;
+  double allowed_error = 1.0;
+
+  for (i = 0; i < n_rounds; i += 1)
+    {
+      sum += test_exponential_dist (mean, USIZE_T_MAX);
+    }
+
+  average = (double) sum / (double) n_rounds;
+  error   = average - (double) mean;
+
+  if (error < allowed_error && error > -allowed_error)
+    {
+      /*P(RINT "error is %f\n", error);*/
+      return 0;
+    }
+
+  stream->msg = "random distribution looks broken";
+  return EINVAL;
+}
+
+static int
+test_setup (void)
+{
+  static int x = 0;
+  x++;
+  //P(RINT "test setup: %d", x);
+  sprintf (TEST_TARGET_FILE, "/tmp/xdtest.target.%d", x);
+  sprintf (TEST_SOURCE_FILE, "/tmp/xdtest.source.%d", x);
+  sprintf (TEST_DELTA_FILE, "/tmp/xdtest.delta.%d", x);
+  sprintf (TEST_RECON_FILE, "/tmp/xdtest.recon.%d", x);
+  sprintf (TEST_RECON2_FILE, "/tmp/xdtest.recon2.%d", x);
+  sprintf (TEST_COPY_FILE, "/tmp/xdtest.copy.%d", x);
+  return 0;
+}
+
+static void
+test_unlink (char* file)
+{
+  while (unlink (file) != 0)
+    {
+      if (errno == ENOENT)
+	{
+	  break;
+	}
+      char buf[TESTBUFSIZE];
+      sprintf (buf, "rm -f %s", file);
+      system (buf);
+    }
+}
+
+static void
+test_cleanup (void)
+{
+  static int x = 0;
+  x++;
+  //P(RINT "test cleanup: %d", x);  
+  test_unlink (TEST_TARGET_FILE);
+  test_unlink (TEST_SOURCE_FILE);
+  test_unlink (TEST_DELTA_FILE);
+  test_unlink (TEST_RECON_FILE);
+  test_unlink (TEST_RECON2_FILE);
+  test_unlink (TEST_COPY_FILE);
+}
+
+static int
+test_make_inputs (xd3_stream *stream, xoff_t *ss_out, xoff_t *ts_out)
+{
+  usize_t ts = (lrand48 () % TEST_FILE_MEAN) + TEST_FILE_MEAN;
+  usize_t ss = (lrand48 () % TEST_FILE_MEAN) + TEST_FILE_MEAN;
+  uint8_t *buf = malloc (ts + ss), *sbuf = buf /*, *tbuf = buf + ss*/;
+  usize_t sadd = 0, sadd_max = ss * TEST_ADD_RATIO;
+  FILE  *tf /*, *sf*/;
+  usize_t i, j;
+  int ret;
+
+  if (buf == NULL) { return ENOMEM; }
+
+  if ((tf = fopen (TEST_TARGET_FILE, "w")) == NULL)
+    {
+      stream->msg = "write failed";
+      ret = get_errno ();
+      goto failure;
+    }
+
+  /* Then modify the data to produce copies, everything not copied is an add.  The
+   * following logic produces the TEST_ADD_RATIO.  The variable SADD contains the number
+   * of adds so far, which should not exceed SADD_MAX. */
+  for (i = 0; i < ss; )
+    {
+      usize_t left = ss - i;
+      usize_t next = test_exponential_dist (TEST_ADD_MEAN, TEST_ADD_MAX);
+      usize_t add_left = sadd_max - sadd;
+      double add_prob = (left == 0) ? 0 : (add_left / left);
+
+      next = min (left, next);
+
+      if (i > 0 && (next > add_left || drand48 () >= add_prob))
+	{
+	  /* Copy */
+	  usize_t offset = lrand48 () % i;
+
+	  for (j = 0; j < next; j += 1)
+	    {
+	      sbuf[i++] = sbuf[offset + j];
+	    }
+	}
+      else
+	{
+	  /* Add */
+	  for (j = 0; j < next; j += 1)
+	    {
+	      sbuf[i++] = lrand48 ();
+	    }
+	}
+    }
+
+  if ((fwrite (sbuf, 1, ss, tf) != ss))
+    {
+      stream->msg = "write failed";
+      ret = get_errno ();
+      goto failure;
+    }
+
+  if ((ret = fclose (tf)) /* || (ret = fclose (sf))*/)
+    {
+      stream->msg = "close failed";
+      ret = get_errno ();
+      goto failure;
+    }
+
+  if (ts_out) { (*ts_out) = ts; }
+  if (ss_out) { (*ss_out) = ss; }
+
+ failure:
+  free (buf);
+  return ret;
+}
+
+static int
+compare_files (xd3_stream *stream, const char* tgt, const char *rec)
+{
+  FILE *orig, *recons;
+  uint8_t obuf[TESTBUFSIZE], rbuf[TESTBUFSIZE];
+  int offset = 0;
+  int i;
+  int oc, rc;
+
+  if ((orig   = fopen (tgt, "r")) == NULL ||
+      (recons = fopen (rec, "r")) == NULL)
+    {
+      stream->msg = "read failed";
+      return get_errno ();
+    }
+
+  for (;;)
+    {
+      oc = fread (obuf, 1, TESTBUFSIZE, orig);
+      rc = fread (rbuf, 1, TESTBUFSIZE, recons);
+
+      if (oc < 0 || rc < 0)
+	{
+	  stream->msg = "read failed";
+	  return get_errno ();
+	}
+
+	if (oc != rc)
+	  {
+	    stream->msg = "compare files: different length";
+	    return EINVAL;
+	  }
+
+	if (oc == 0)
+	  {
+	    break;
+	  }
+
+	for (i = 0; i < oc; i += 1)
+	  {
+	    if (obuf[i] != rbuf[i])
+	      {
+		stream->msg = "compare files: different values";
+		return EINVAL;
+	      }
+	  }
+
+	offset += oc;
+    }
+
+    fclose (orig);
+    fclose (recons);
+    return 0;
+}
+
+static int
+test_save_copy (const char *origname)
+{
+  char buf[TESTBUFSIZE];
+  int ret;
+
+  sprintf (buf, "cp -f %s %s", origname, TEST_COPY_FILE);
+
+  if ((ret = system (buf)) != 0)
+    {
+      return EINVAL;
+    }
+
+  return 0;
+}
+
+static int
+test_file_size (const char* file, xoff_t *size)
+{
+  struct stat sbuf;
+  int ret;
+
+  if (stat (file, & sbuf) < 0)
+    {
+      ret = get_errno ();
+      P(RINT "xdelta3: stat failed: %s: %s\n", file, strerror (ret));
+      return ret;
+    }
+
+  if (! S_ISREG (sbuf.st_mode))
+    {
+      ret = EINVAL;
+      P(RINT "xdelta3: not a regular file: %s: %s\n", file, strerror (ret));
+      return ret;
+    }
+
+  (*size) = sbuf.st_size;
+  return 0;
+}
+
+/******************************************************************************************
+ READ OFFSET
+ ******************************************************************************************/
+
+/* Common test for read_integer errors: encodes a 64-bit value and then attempts to read
+ * as a 32-bit value.  If TRUNC is non-zero, attempts to get errors by shortening the
+ * input, otherwise it should overflow.  Expects EINVAL and MSG. */
+static int
+test_read_integer_error (xd3_stream *stream, int trunto, const char *msg)
+{
+  uint64_t eval = (uint64_t) UINT32_MAX + 1ULL;
+  uint32_t rval;
+  xd3_output *buf = NULL;
+  const uint8_t *max;
+  const uint8_t *inp;
+  int ret;
+
+  buf = xd3_alloc_output (stream, buf);
+
+  if ((ret = xd3_emit_uint64_t (stream, & buf, eval)))
+    {
+      goto fail;
+    }
+
+ again:
+
+  inp = buf->base;
+  max = buf->base + buf->next - trunto;
+
+  if ((ret = xd3_read_uint32_t (stream, & inp, max, & rval)) != EINVAL || !MSG_IS (msg))
+    {
+      ret = EINVAL;
+    }
+  else if (trunto && trunto < buf->next)
+    {
+      trunto += 1;
+      goto again;
+    }
+  else
+    {
+      ret = 0;
+    }
+
+ fail:
+  xd3_free_output (stream, buf);
+  return ret;
+}
+
+/* Test integer overflow using the above routine. */
+static int
+test_decode_integer_overflow (xd3_stream *stream, int unused)
+{
+  return test_read_integer_error (stream, 0, "overflow in read_intger");
+}
+
+/* Test integer EOI using the above routine. */
+static int
+test_decode_integer_end_of_input (xd3_stream *stream, int unused)
+{
+  return test_read_integer_error (stream, 1, "end-of-input in read_integer");
+}
+
+/* Test that emit_integer/decode_integer/sizeof_integer/read_integer work on correct
+ * inputs.  Tests powers of (2^7), plus or minus, up to the maximum value. */
+#define TEST_ENCODE_DECODE_INTEGER(TYPE,ONE,MAX)                                \
+  xd3_output *rbuf = NULL;                                                      \
+  xd3_output *dbuf = NULL;                                                      \
+  TYPE values[64];                                                              \
+  int nvalues = 0;                                                              \
+  int i, ret = 0;                                                               \
+                                                                                \
+  for (i = 0; i < (sizeof (TYPE) * 8); i += 7)                                  \
+    {                                                                           \
+      values[nvalues++] = (ONE << i) - ONE;                                     \
+      values[nvalues++] = (ONE << i);                                           \
+      values[nvalues++] = (ONE << i) + ONE;                                     \
+    }                                                                           \
+                                                                                \
+  values[nvalues++] = MAX-ONE;                                                  \
+  values[nvalues++] = MAX;                                                      \
+                                                                                \
+  rbuf = xd3_alloc_output (stream, rbuf);                                       \
+  dbuf = xd3_alloc_output (stream, dbuf);                                       \
+                                                                                \
+  for (i = 0; i < nvalues; i += 1)                                              \
+    {                                                                           \
+      const uint8_t *max;                                                       \
+      const uint8_t *inp;                                                       \
+      TYPE val;                                                                 \
+                                                                                \
+      DOT ();                                                                   \
+      rbuf->next = 0;                                                           \
+                                                                                \
+      if ((ret = xd3_emit_ ## TYPE (stream, & rbuf, values[i])) ||              \
+	  (ret = xd3_emit_ ## TYPE (stream, & dbuf, values[i])))                \
+	{                                                                       \
+	  goto fail;                                                            \
+	}                                                                       \
+                                                                                \
+      inp = rbuf->base;                                                         \
+      max = rbuf->base + rbuf->next;                                            \
+                                                                                \
+      if (rbuf->next != xd3_sizeof_ ## TYPE (values[i]))                        \
+	{                                                                       \
+	  ret = EINVAL;                                                         \
+	  goto fail;                                                            \
+	}                                                                       \
+                                                                                \
+      if ((ret = xd3_read_ ## TYPE (stream, & inp, max, & val)))                \
+	{                                                                       \
+	  goto fail;                                                            \
+	}                                                                       \
+                                                                                \
+      if (val != values[i])                                                     \
+	{                                                                       \
+	  ret = EINVAL;                                                         \
+	  goto fail;                                                            \
+	}                                                                       \
+                                                                                \
+      DOT ();                                                                   \
+    }                                                                           \
+                                                                                \
+  stream->next_in  = dbuf->base;                                                \
+  stream->avail_in = dbuf->next;                                                \
+                                                                                \
+  for (i = 0; i < nvalues; i += 1)                                              \
+    {                                                                           \
+      TYPE val;                                                                 \
+                                                                                \
+      if ((ret = xd3_decode_ ## TYPE (stream, & val)))                          \
+        {                                                                       \
+          goto fail;                                                            \
+        }                                                                       \
+                                                                                \
+      if (val != values[i])                                                     \
+        {                                                                       \
+          ret = EINVAL;                                                         \
+          goto fail;                                                            \
+        }                                                                       \
+    }                                                                           \
+                                                                                \
+  if (stream->avail_in != 0)                                                    \
+    {                                                                           \
+      ret = EINVAL;                                                             \
+      goto fail;                                                                \
+    }                                                                           \
+                                                                                \
+ fail:                                                                          \
+  xd3_free_output (stream, rbuf);                                               \
+  xd3_free_output (stream, dbuf);                                               \
+                                                                                \
+  return ret
+
+static int
+test_encode_decode_uint32_t (xd3_stream *stream, int unused)
+{
+  TEST_ENCODE_DECODE_INTEGER(uint32_t,1U,UINT32_MAX);
+}
+
+static int
+test_encode_decode_uint64_t (xd3_stream *stream, int unused)
+{
+  TEST_ENCODE_DECODE_INTEGER(uint64_t,1ULL,UINT64_MAX);
+}
+
+static int
+test_usize_t_overflow (xd3_stream *stream, int unused)
+{
+  if (USIZE_T_OVERFLOW (0, 0)) { goto fail; }
+  if (USIZE_T_OVERFLOW (USIZE_T_MAX, 0)) { goto fail; }
+  if (USIZE_T_OVERFLOW (0, USIZE_T_MAX)) { goto fail; }
+  if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, 0)) { goto fail; }
+  if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, USIZE_T_MAX / 2)) { goto fail; }
+  if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, USIZE_T_MAX / 2 + 1)) { goto fail; }
+
+  if (! USIZE_T_OVERFLOW (USIZE_T_MAX, 1)) { goto fail; }
+  if (! USIZE_T_OVERFLOW (1, USIZE_T_MAX)) { goto fail; }
+  if (! USIZE_T_OVERFLOW (USIZE_T_MAX / 2 + 1, USIZE_T_MAX / 2 + 1)) { goto fail; }
+
+  return 0;
+
+ fail:
+  stream->msg = "incorrect overflow computation";
+  return EINVAL;
+}
+
+/******************************************************************************************
+ Address cache
+ ******************************************************************************************/
+
+static int
+test_address_cache (xd3_stream *stream, int unused)
+{
+  int ret, i;
+  usize_t offset;
+  usize_t *addrs;
+  uint8_t *big_buf, *buf_max;
+  const uint8_t *buf;
+  xd3_output *outp;
+  uint8_t *modes;
+  int mode_counts[16];
+
+  stream->acache.s_near = stream->code_table_desc->near_modes;
+  stream->acache.s_same = stream->code_table_desc->same_modes;
+
+  if ((ret = xd3_encode_init (stream))) { return ret; }
+
+  addrs = xd3_alloc (stream, sizeof (usize_t), ADDR_CACHE_ROUNDS);
+  modes = xd3_alloc (stream, sizeof (uint8_t), ADDR_CACHE_ROUNDS);
+
+  memset (mode_counts, 0, sizeof (mode_counts));
+  memset (modes, 0, ADDR_CACHE_ROUNDS);
+
+  addrs[0] = 0;
+
+  srand48 (0x9f73f7fc);
+
+  /* First pass: encode addresses */
+  xd3_init_cache (& stream->acache);
+
+  for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1)
+    {
+      double p;
+      usize_t addr;
+      usize_t prev_i;
+      usize_t nearby;
+
+      p         = drand48 ();
+      prev_i    = lrand48 () % offset;
+      nearby    = (lrand48 () % 256) % offset, 1;
+      nearby    = max (1U, nearby);
+
+      if (p < 0.1)      { addr = addrs[offset-nearby]; }
+      else if (p < 0.4) { addr = min (addrs[prev_i] + nearby, offset-1); }
+      else              { addr = prev_i; }
+
+      if ((ret = xd3_encode_address (stream, addr, offset, & modes[offset]))) { return ret; }
+
+      addrs[offset] = addr;
+      mode_counts[modes[offset]] += 1;
+    }
+
+  /* Copy addresses into a contiguous buffer. */
+  big_buf = xd3_alloc (stream, xd3_sizeof_output (ADDR_HEAD (stream)), 1);
+
+  for (offset = 0, outp = ADDR_HEAD (stream); outp != NULL; offset += outp->next, outp = outp->next_page)
+    {
+      memcpy (big_buf + offset, outp->base, outp->next);
+    }
+
+  buf_max = big_buf + offset;
+  buf     = big_buf;
+
+  /* Second pass: decode addresses */
+  xd3_init_cache (& stream->acache);
+
+  for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1)
+    {
+      usize_t addr;
+
+      if ((ret = xd3_decode_address (stream, offset, modes[offset], & buf, buf_max, & addr))) { return ret; }
+
+      if (addr != addrs[offset])
+	{
+	  stream->msg = "incorrect decoded address";
+	  return EINVAL;
+	}
+    }
+
+  /* Check that every byte, mode was used. */
+  if (buf != buf_max)
+    {
+      stream->msg = "address bytes not used";
+      return EINVAL;
+    }
+
+  for (i = 0; i < (2 + stream->acache.s_same + stream->acache.s_near); i += 1)
+    {
+      if (mode_counts[i] == 0)
+	{
+	  stream->msg = "address mode not used";
+	  return EINVAL;
+	}
+    }
+
+  xd3_free (stream, modes);
+  xd3_free (stream, addrs);
+  xd3_free (stream, big_buf);
+
+  return 0;
+}
+
+/******************************************************************************************
+ Encode and decode with single bit error
+ ******************************************************************************************/
+
+/* It compresses from 256 to around 185 bytes.
+ * Avoids matching addresses that are a single-bit difference.
+ * Avoids matching address 0. */
+static const uint8_t test_text[] =
+"this is a story\n"
+"abouttttttttttt\n"
+"- his is a stor\n"
+"- about nothing "
+" all. boutique -"
+"his story is a -"
+"about           "
+"what happens all"
+" the time what -"
+"am I ttttttt the"
+" person said, so"
+" what, per son -"
+" gory story is -"
+" about nothing -"
+"tttttt to test -"
+"his sto nothing";
+
+static const uint8_t test_apphead[] = "header test";
+
+static int
+test_compress_text (xd3_stream  *stream,
+		    uint8_t     *encoded,
+		    usize_t      *encoded_size)
+{
+  int ret;
+  xd3_config cfg;
+  int flags = stream->flags;
+
+  stream->flags |= XD3_FLUSH;
+
+  (*encoded_size) = 0;
+
+  xd3_set_appheader (stream, test_apphead, sizeof (test_apphead));
+
+  if ((ret = xd3_encode_completely (stream, test_text, sizeof (test_text),
+				    encoded, encoded_size, 4*sizeof (test_text)))) { goto fail; }
+
+  if ((ret = xd3_close_stream (stream))) { goto fail; }
+
+ fail:
+  xd3_free_stream (stream);
+  xd3_init_config (& cfg, flags);
+  xd3_config_stream (stream, & cfg);
+  return ret;
+}
+
+static int
+test_decompress_text (xd3_stream *stream, uint8_t *enc, usize_t enc_size, usize_t test_desize)
+{
+  xd3_config cfg;
+  char decoded[sizeof (test_text)];
+  uint8_t *apphead;
+  usize_t apphead_size;
+  usize_t decoded_size;
+  const char *msg;
+  int  ret;
+  usize_t pos = 0;
+  int flags = stream->flags;
+  usize_t take;
+
+ input:
+  /* Test decoding test_desize input bytes at a time */
+  take = min (enc_size - pos, test_desize);
+  XD3_ASSERT (take > 0);
+
+  xd3_avail_input (stream, enc + pos, take);
+ again:
+  ret = xd3_decode_input (stream);
+
+  pos += take;
+  take = 0;
+
+  switch (ret)
+    {
+    case XD3_OUTPUT:
+      break;
+    case XD3_WINSTART:
+    case XD3_GOTHEADER:
+      goto again;
+    case XD3_INPUT:
+      if (pos < enc_size) { goto input; }
+      /* else fallthrough */
+    case XD3_WINFINISH:
+    default:
+      goto fail;
+    }
+
+  XD3_ASSERT (ret == XD3_OUTPUT);
+  XD3_ASSERT (pos == enc_size);
+
+  if (stream->avail_out != sizeof (test_text))
+    {
+      stream->msg = "incorrect output size";
+      ret = EINVAL;
+      goto fail;
+    }
+
+  decoded_size = stream->avail_out;
+  memcpy (decoded, stream->next_out, stream->avail_out);
+
+  xd3_consume_output (stream);
+
+  if ((ret = xd3_get_appheader (stream, & apphead, & apphead_size))) { goto fail; }
+
+  if (apphead_size != sizeof (test_apphead) || memcmp (apphead, test_apphead, sizeof (test_apphead)) != 0)
+    {
+      stream->msg = "incorrect appheader";
+      ret = EINVAL;
+      goto fail;
+    }
+
+  if ((ret = xd3_decode_input (stream)) != XD3_WINFINISH ||
+      (ret = xd3_close_stream (stream)) != 0)
+    {
+      goto fail;
+    }
+
+  if (decoded_size != sizeof (test_text) || memcmp (decoded, test_text, sizeof (test_text)) != 0)
+    {
+      stream->msg = "incorrect output text";
+      ret = EIO;
+    }
+
+ fail:
+  msg = stream->msg;
+  xd3_free_stream (stream);
+  xd3_init_config (& cfg, flags);
+  xd3_config_stream (stream, & cfg);
+  stream->msg = msg;
+
+  return ret;
+}
+
+static int
+test_decompress_single_bit_error (xd3_stream *stream, int expected_non_failures)
+{
+  int ret;
+  int i;
+  uint8_t encoded[4*sizeof (test_text)]; /* make room for alt code table */
+  usize_t  encoded_size;
+  int non_failures = 0;
+  int cksum = (stream->flags & XD3_ADLER32) != 0;
+
+#if 1
+#define TEST_FAILURES()
+#else
+  /* For checking non-failure cases by hand, enable this macro and run xdelta printdelta
+   * with print_cpymode enabled.  Every non-failure should change a copy address mode,
+   * which doesn't cause a failure because the address cache starts out with all zeros.
+
+    ./xdelta3 test
+    for i in test_text.xz.*; do ./xdelta3 printdelta $i > $i.out; diff $i.out test_text.xz.0.out; done
+
+   */
+  system ("rm -rf test_text.*");
+  {
+    char buf[64];
+    FILE *f;
+    sprintf (buf, "test_text");
+    f = fopen (buf, "w");
+    fwrite (test_text,1,sizeof (test_text),f);
+    fclose (f);
+  }
+#define TEST_FAILURES()                                                         \
+  do {                                                                          \
+    char buf[64];                                                               \
+    FILE *f;                                                                    \
+    sprintf (buf, "test_text.xz.%d", non_failures);                             \
+    f = fopen (buf, "w");                                                       \
+    fwrite (encoded,1,encoded_size,f);                                          \
+    fclose (f);                                                                 \
+  } while (0)
+#endif
+
+  stream->sec_data.inefficient = 1;
+  stream->sec_inst.inefficient = 1;
+  stream->sec_addr.inefficient = 1;
+
+  /* Encode text, test correct input */
+  if ((ret = test_compress_text (stream, encoded, & encoded_size)))
+    {
+      /*stream->msg = "without error: encode failure";*/
+      return ret;
+    }
+  if ((ret = test_decompress_text (stream, encoded, encoded_size, sizeof (test_text) / 4)))
+    {
+      /*stream->msg = "without error: decode failure";*/
+      return ret;
+    }
+
+  TEST_FAILURES();
+
+  for (i = 0; i < encoded_size*8; i += 1)
+    {
+      /* Single bit error. */
+      encoded[i/8] ^= 1 << (i%8);
+
+      if ((ret = test_decompress_text (stream, encoded, encoded_size, sizeof (test_text))) == 0)
+	{
+	  non_failures += 1;
+	  /*P(RINT "%u[%u] non-failure %u\n", i/8, i%8, non_failures);*/
+	  TEST_FAILURES();
+	}
+      else
+	{
+	  /*P(RINT "%u[%u] failure: %s\n", i/8, i%8, stream->msg);*/
+	}
+
+      /* decompress_text returns EIO when the final memcmp() fails, but that
+       * should never happen with checksumming on. */
+      if (cksum && ret == EIO)
+	{
+	  /*P(RINT "%u[%u] cksum mismatch\n", i/8, i%8);*/
+	  stream->msg = "checksum mismatch";
+	  return EINVAL;
+	}
+
+      /* Undo single bit error. */
+      encoded[i/8] ^= 1 << (i%8);
+    }
+
+  /* Test correct input again */
+  if ((ret = test_decompress_text (stream, encoded, encoded_size, 1)))
+    {
+      /*stream->msg = "without error: decode failure";*/
+      return ret;
+    }
+
+  /* Check expected non-failures */
+  if (non_failures != expected_non_failures)
+    {
+      P(RINT "non-failures %u; expected %u", non_failures, expected_non_failures);
+      stream->msg = "incorrect";
+      return EINVAL;
+    }
+
+  DOT ();
+
+  return 0;
+}
+
+/******************************************************************************************
+ Secondary compression tests
+ ******************************************************************************************/
+
+#if SECONDARY_ANY
+typedef int (*sec_dist_func) (xd3_stream *stream, xd3_output *data);
+
+static int sec_dist_func1 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func2 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func3 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func4 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func5 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func6 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func7 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func8 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func9 (xd3_stream *stream, xd3_output *data);
+
+static sec_dist_func sec_dists[] =
+{
+  sec_dist_func1,
+  sec_dist_func2,
+  sec_dist_func3,
+  sec_dist_func4,
+  sec_dist_func5,
+  sec_dist_func6,
+  sec_dist_func7,
+  sec_dist_func8,
+  sec_dist_func9,
+};
+
+/* Test ditsribution: 100 bytes of the same character (13). */
+static int
+sec_dist_func1 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret;
+  for (i = 0; i < 100; i += 1)
+    {
+      if ((ret = xd3_emit_byte (stream, & data, 13))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test ditsribution: uniform covering half the alphabet. */
+static int
+sec_dist_func2 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret;
+  for (i = 0; i < ALPHABET_SIZE; i += 1)
+    {
+      if ((ret = xd3_emit_byte (stream, & data, i%(ALPHABET_SIZE/2)))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test ditsribution: uniform covering the entire alphabet. */
+static int
+sec_dist_func3 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret;
+  for (i = 0; i < ALPHABET_SIZE; i += 1)
+    {
+      if ((ret = xd3_emit_byte (stream, & data, i%ALPHABET_SIZE))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test distribution: An exponential distribution covering half the alphabet */
+static int
+sec_dist_func4 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret, x;
+  for (i = 0; i < ALPHABET_SIZE*20; i += 1)
+    {
+      x = test_exponential_dist (10, ALPHABET_SIZE/2);
+      if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test distribution: An exponential distribution covering the entire alphabet */
+static int
+sec_dist_func5 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret, x;
+  for (i = 0; i < ALPHABET_SIZE*20; i += 1)
+    {
+      x = test_exponential_dist (10, ALPHABET_SIZE-1);
+      if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test distribution: An uniform random distribution covering half the alphabet */
+static int
+sec_dist_func6 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret, x;
+  for (i = 0; i < ALPHABET_SIZE*20; i += 1)
+    {
+      x = lrand48 () % (ALPHABET_SIZE/2);
+      if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test distribution: An uniform random distribution covering the entire alphabet */
+static int
+sec_dist_func7 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret, x;
+  for (i = 0; i < ALPHABET_SIZE*20; i += 1)
+    {
+      x = lrand48 () % ALPHABET_SIZE;
+      if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test distribution: A small number of frequent characters, difficult to divide into many
+ * groups */
+static int
+sec_dist_func8 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret;
+  for (i = 0; i < ALPHABET_SIZE*5; i += 1)
+    {
+      if ((ret = xd3_emit_byte (stream, & data, 0))) { return ret; }
+      if ((ret = xd3_emit_byte (stream, & data, 64))) { return ret; }
+      if ((ret = xd3_emit_byte (stream, & data, 128))) { return ret; }
+      if ((ret = xd3_emit_byte (stream, & data, 255))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test distribution: One that causes many FGK block promotions (found a bug) */
+static int
+sec_dist_func9 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret;
+
+  int ramp   = 0;
+  int rcount = 0;
+  int prom   = 0;
+  int pcount = 0;
+
+  /* 200 was long enough to trigger it--only when stricter checking that counted all
+   * blocks was turned on, but it seems I deleted this code. (missing fgk_free_block on
+   * line 398). */
+  for (i = 0; i < ALPHABET_SIZE*200; i += 1)
+    {
+    repeat:
+      if (ramp < ALPHABET_SIZE)
+	{
+	  /* Initially Nth symbol has (N+1) frequency */
+	  if (rcount <= ramp)
+	    {
+	      rcount += 1;
+	      if ((ret = xd3_emit_byte (stream, & data, ramp))) { return ret; }
+	      continue;
+	    }
+
+	  ramp   += 1;
+	  rcount  = 0;
+	  goto repeat;
+	}
+
+      /* Thereafter, promote least freq to max freq */
+      if (pcount == ALPHABET_SIZE)
+	{
+	  pcount = 0;
+	  prom   = (prom + 1) % ALPHABET_SIZE;
+	}
+
+      pcount += 1;
+      if ((ret = xd3_emit_byte (stream, & data, prom))) { return ret; }
+    }
+
+  return 0;
+}
+
+static int
+test_secondary_decode (xd3_stream         *stream,
+		       const xd3_sec_type *sec,
+		       usize_t              input_size,
+		       usize_t              compress_size,
+		       const uint8_t      *dec_input,
+		       const uint8_t      *dec_correct,
+		       uint8_t            *dec_output)
+{
+  int ret;
+  xd3_sec_stream *dec_stream;
+  const uint8_t *dec_input_used, *dec_input_end;
+  uint8_t *dec_output_used, *dec_output_end;
+
+  if ((dec_stream = sec->alloc (stream)) == NULL) { return ENOMEM; }
+
+  sec->init (dec_stream);
+
+  dec_input_used = dec_input;
+  dec_input_end  = dec_input + compress_size;
+
+  dec_output_used = dec_output;
+  dec_output_end  = dec_output + input_size;
+
+  if ((ret = sec->decode (stream, dec_stream,
+			  & dec_input_used, dec_input_end,
+			  & dec_output_used, dec_output_end)))
+    {
+      goto fail;
+    }
+
+  if (dec_input_used != dec_input_end)
+    {
+      stream->msg = "unused input";
+      ret = EINVAL;
+      goto fail;
+    }
+
+  if (dec_output_used != dec_output_end)
+    {
+      stream->msg = "unfinished output";
+      ret = EINVAL;
+      goto fail;
+    }
+
+  if (memcmp (dec_output, dec_correct, input_size) != 0)
+    {
+      stream->msg = "incorrect output";
+      ret = EINVAL;
+      goto fail;
+    }
+
+ fail:
+  sec->destroy (stream, dec_stream);
+  return ret;
+}
+
+static int
+test_secondary (xd3_stream *stream, const xd3_sec_type *sec, int groups)
+{
+  int test_i, ret;
+  xd3_output *in_head, *out_head, *p;
+  usize_t p_off, input_size, compress_size;
+  uint8_t *dec_input = NULL, *dec_output = NULL, *dec_correct = NULL;
+  xd3_sec_stream *enc_stream;
+  xd3_sec_cfg cfg;
+
+  memset (& cfg, 0, sizeof (cfg));
+
+  cfg.inefficient = 1;
+
+  for (cfg.ngroups = 1; cfg.ngroups <= groups; cfg.ngroups += 1)
+    {
+      P(RINT "\n...");
+      for (test_i = 0; test_i < SIZEOF_ARRAY (sec_dists); test_i += 1)
+	{
+	  srand48 (0x84687674);
+
+	  in_head  = xd3_alloc_output (stream, NULL);
+	  out_head = xd3_alloc_output (stream, NULL);
+	  enc_stream = sec->alloc (stream);
+	  dec_input = NULL;
+	  dec_output = NULL;
+	  dec_correct = NULL;
+
+	  if (in_head == NULL || out_head == NULL || enc_stream == NULL) { goto nomem; }
+
+	  if ((ret = sec_dists[test_i] (stream, in_head))) { goto fail; }
+
+	  sec->init (enc_stream);
+
+	  /* Encode data */
+	  if ((ret = sec->encode (stream, enc_stream, in_head, out_head, & cfg)))
+	    {
+	      P(RINT "test %u: encode: %s", test_i, stream->msg);
+	      goto fail;
+	    }
+
+	  /* Calculate sizes, allocate contiguous arrays for decoding */
+	  input_size    = xd3_sizeof_output (in_head);
+	  compress_size = xd3_sizeof_output (out_head);
+
+	  P(RINT "%.3f", 8.0 * (double) compress_size / (double) input_size);
+
+	  if ((dec_input   = xd3_alloc (stream, compress_size, 1)) == NULL ||
+	      (dec_output  = xd3_alloc (stream, input_size, 1)) == NULL ||
+	      (dec_correct = xd3_alloc (stream, input_size, 1)) == NULL) { goto nomem; }
+
+	  /* Fill the compressed data array */
+	  for (p_off = 0, p = out_head; p != NULL; p_off += p->next, p = p->next_page)
+	    {
+	      memcpy (dec_input + p_off, p->base, p->next);
+	    }
+
+	  XD3_ASSERT (p_off == compress_size);
+
+	  /* Fill the input data array */
+	  for (p_off = 0, p = in_head; p != NULL; p_off += p->next, p = p->next_page)
+	    {
+	      memcpy (dec_correct + p_off, p->base, p->next);
+	    }
+
+	  XD3_ASSERT (p_off == input_size);
+
+	  if ((ret = test_secondary_decode (stream, sec, input_size, compress_size, dec_input, dec_correct, dec_output)))
+	    {
+	      P(RINT "test %u: decode: %s", test_i, stream->msg);
+	      goto fail;
+	    }
+
+	  /* Single-bit error test, only cover the first 10 bytes.  Some non-failures are
+	   * expected in the Huffman case: Changing the clclen array, for example, may not
+	   * harm the decoding.  Really looking for faults here. */
+	  {
+	    int i;
+	    int bytes = min (compress_size, 10U);
+	    for (i = 0; i < bytes * 8; i += 1)
+	      {
+		dec_input[i/8] ^= 1 << (i%8);
+
+		if ((ret = test_secondary_decode (stream, sec, input_size, compress_size, dec_input, dec_correct, dec_output)) == 0)
+		  {
+		    /*P(RINT "test %u: decode single-bit [%u/%u] error non-failure", test_i, i/8, i%8);*/
+		  }
+
+		dec_input[i/8] ^= 1 << (i%8);
+
+		if ((i % (2*bytes)) == (2*bytes)-1)
+		  {
+		    DOT ();
+		  }
+	      }
+	    ret = 0;
+	  }
+
+	  if (0) { nomem: ret = ENOMEM; }
+
+	fail:
+	  sec->destroy (stream, enc_stream);
+	  xd3_free_output (stream, in_head);
+	  xd3_free_output (stream, out_head);
+	  xd3_free (stream, dec_input);
+	  xd3_free (stream, dec_output);
+	  xd3_free (stream, dec_correct);
+
+	  if (ret != 0) { return ret; }
+	}
+    }
+
+  return 0;
+}
+
+IF_FGK (static int test_secondary_fgk  (xd3_stream *stream, int gp) { return test_secondary (stream, & fgk_sec_type, gp); })
+IF_DJW (static int test_secondary_huff (xd3_stream *stream, int gp) { return test_secondary (stream, & djw_sec_type, gp); })
+#endif
+
+/******************************************************************************************
+ TEST INSTRUCTION TABLE
+ ******************************************************************************************/
+
+/* Test that xd3_choose_instruction() does the right thing for its code table. */
+static int
+test_choose_instruction (xd3_stream *stream, int ignore)
+{
+  int i;
+
+  stream->code_table = (*stream->code_table_func) ();
+
+  for (i = 0; i < 256; i += 1)
+    {
+      const xd3_dinst *d = stream->code_table + i;
+      xd3_rinst prev, inst;
+
+      XD3_ASSERT (d->type1 > 0);
+
+      memset (& prev, 0, sizeof (prev));
+      memset (& inst, 0, sizeof (inst));
+
+      if (d->type2 == 0)
+	{
+	  inst.type = d->type1;
+
+	  if ((inst.size = d->size1) == 0)
+	    {
+	      inst.size = TESTBUFSIZE;
+	    }
+
+	  XD3_CHOOSE_INSTRUCTION (stream, NULL, & inst);
+
+	  if (inst.code2 != 0 || inst.code1 != i)
+	    {
+	      stream->msg = "wrong single instruction";
+	      return EINVAL;
+	    }
+	}
+      else
+	{
+	  prev.type = d->type1;
+	  prev.size = d->size1;
+	  inst.type = d->type2;
+	  inst.size = d->size2;
+
+	  XD3_CHOOSE_INSTRUCTION (stream, & prev, & inst);
+
+	  if (prev.code2 != i)
+	    {
+	      stream->msg = "wrong double instruction";
+	      return EINVAL;
+	    }
+	}
+    }
+
+  return 0;
+}
+
+/******************************************************************************************
+ TEST INSTRUCTION TABLE CODING
+ ******************************************************************************************/
+
+#if GENERIC_ENCODE_TABLES
+/* Test that encoding and decoding a code table works */
+static int
+test_encode_code_table (xd3_stream *stream, int ignore)
+{
+  int ret;
+  const uint8_t *comp_data;
+  usize_t comp_size;
+
+  if ((ret = xd3_compute_alternate_table_encoding (stream, & comp_data, & comp_size)))
+    {
+      return ret;
+    }
+
+  stream->acache.s_near = __alternate_code_table_desc.near_modes;
+  stream->acache.s_same = __alternate_code_table_desc.same_modes;
+
+  if ((ret = xd3_apply_table_encoding (stream, comp_data, comp_size)))
+    {
+      return ret;
+    }
+
+  if (memcmp (stream->code_table, xd3_alternate_code_table (), sizeof (xd3_dinst) * 256) != 0)
+    {
+      stream->msg = "wrong code table reconstruction";
+      return EINVAL;
+    }
+
+  return 0;
+}
+#endif
+
+/******************************************************************************************
+ 64BIT STREAMING
+ ******************************************************************************************/
+
+/* This test encodes and decodes a series of 1 megabyte windows, each containing a long
+ * run of zeros along with a single xoff_t size record to indicate the sequence. */
+static int
+test_streaming (xd3_stream *in_stream, uint8_t *encbuf, uint8_t *decbuf, uint8_t *delbuf, usize_t megs)
+{
+  xd3_stream estream, dstream;
+  int ret;
+  usize_t i, delsize, decsize;
+
+  if ((ret = xd3_config_stream (& estream, NULL)) ||
+      (ret = xd3_config_stream (& dstream, NULL)))
+    {
+      goto fail;
+    }
+
+  for (i = 0; i < megs; i += 1)
+    {
+      ((usize_t*) encbuf)[0] = i;
+
+      if ((i % 200) == 199) { DOT (); }
+
+      if ((ret = xd3_process_completely (& estream, xd3_encode_input, 0,
+					 encbuf, 1 << 20,
+					 delbuf, & delsize, 1 << 10)))
+	{
+	  in_stream->msg = estream.msg;
+	  goto fail;
+	}
+
+      if ((ret = xd3_process_completely (& dstream, xd3_decode_input, 0,
+					 delbuf, delsize,
+					 decbuf, & decsize, 1 << 20)))
+	{
+	  in_stream->msg = dstream.msg;
+	  goto fail;
+	}
+
+      if (decsize != 1 << 20 ||
+	  memcmp (encbuf, decbuf, 1 << 20) != 0)
+	{
+	  in_stream->msg = "wrong result";
+	  ret = EINVAL;
+	  goto fail;
+	}
+    }
+
+  if ((ret = xd3_close_stream (& estream)) ||
+      (ret = xd3_close_stream (& dstream)))
+    {
+      goto fail;
+    }
+
+ fail:
+  xd3_free_stream (& estream);
+  xd3_free_stream (& dstream);
+  return ret;
+}
+
+/* Run tests of data streaming of over and around 4GB of data. */
+static int
+test_compressed_stream_overflow (xd3_stream *stream, int ignore)
+{
+  int ret;
+  uint8_t *buf;
+
+  if ((buf = malloc (TWO_MEGS_AND_DELTA)) == NULL) { return ENOMEM; }
+
+  memset (buf, 0, TWO_MEGS_AND_DELTA);
+
+  /* Test overflow of a 32-bit file offset. */
+  if (SIZEOF_XOFF_T == 4)
+    {
+      ret = test_streaming (stream, buf, buf + (1 << 20), buf + (2 << 20), (1 << 12) + 1);
+
+      if (ret == EINVAL && MSG_IS ("decoder file offset overflow"))
+	{
+	  ret = 0;
+	}
+      else
+	{
+	  stream->msg = "expected overflow condition";
+	  ret = EINVAL;
+	  goto fail;
+	}
+    }
+
+  /* Test transfer of exactly 32bits worth of data. */
+  if ((ret = test_streaming (stream, buf, buf + (1 << 20), buf + (2 << 20), 1 << 12))) { goto fail; }
+
+ fail:
+  free (buf);
+  return ret;
+}
+
+/******************************************************************************************
+ COMMAND LINE
+ ******************************************************************************************/
+
+/* For each pair of command templates in the array below, test that encoding and decoding
+ * commands work.  Also check for the expected size delta, which should be approximately
+ * TEST_ADD_RATIO times the file size created by test_make_inputs.  Due to differences in
+ * the application header, it is suppressed (-A) so that all delta files are the same. */
+static int
+test_command_line_arguments (xd3_stream *stream, int ignore)
+{
+  int i, ret;
+
+  static const char* cmdpairs[] =
+  {
+    /* standard input, output */
+    "%s -A < %s > %s", "%s -d < %s > %s",
+    "%s -A -e < %s > %s", "%s -d < %s > %s",
+    "%s -A= encode < %s > %s", "%s decode < %s > %s",
+    "%s -A -q encode < %s > %s", "%s -qdq < %s > %s",
+
+    /* file input, standard output */
+    "%s -A= %s > %s", "%s -d %s > %s",
+    "%s -A -e %s > %s", "%s -d %s > %s",
+    "%s encode -A= %s > %s", "%s decode %s > %s",
+
+    /* file input, output */
+    "%s -A= %s %s", "%s -d %s %s",
+    "%s -A -e %s %s", "%s -d %s %s",
+    "%s -A= encode %s %s", "%s decode %s %s",
+
+    /* option placement */
+    "%s -A -f %s %s", "%s -f -d %s %s",
+    "%s -e -A= %s %s", "%s -d -f %s %s",
+    "%s -f encode -A= %s %s", "%s -f decode -f %s %s",
+  };
+
+  char ecmd[128], dcmd[128];
+  int pairs = SIZEOF_ARRAY (cmdpairs) / 2;
+  xoff_t tsize;
+  xoff_t dsize;
+  double ratio;
+
+  srand48 (0x89162337);
+
+  for (i = 0; i < pairs; i += 1)
+    {
+      sprintf (ecmd, cmdpairs[2*i], program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
+      sprintf (dcmd, cmdpairs[2*i+1], program_name, TEST_DELTA_FILE, TEST_RECON_FILE);
+
+      test_setup ();
+      if ((ret = test_make_inputs (stream, NULL, & tsize))) { return ret; }
+
+      /* Encode and decode. */
+      if ((ret = system (ecmd)) != 0)
+	{
+	  P(RINT "xdelta3: command was: %s\n", ecmd);
+	  stream->msg = "encode cmd failed";
+	  return EINVAL;
+	}
+
+      if ((ret = system (dcmd)) != 0)
+	{
+	  stream->msg = "decode cmd failed";
+	  return EINVAL;
+	}
+
+      /* Compare the target file. */
+      if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE)))
+	{
+	  return ret;
+	}
+
+      if (i == 0)
+	{
+	  /* The first time through, check the compression ratio and save a copy of the
+	   * delta. */
+	  if ((ret = test_save_copy (TEST_DELTA_FILE)))
+	    {
+	      stream->msg = "copy failed";
+	      return ret;
+	    }
+
+	  if ((ret = test_file_size (TEST_DELTA_FILE, & dsize)))
+	    {
+	      return ret;
+	    }
+
+	  ratio = (double) dsize / (double) tsize;
+
+	  /* Check that it is not too small, not too large. */
+	  if (ratio >= TEST_ADD_RATIO + TEST_EPSILON)
+	    {
+	      P(RINT "xdelta3: test encode with size ratio %.3f, expected < %.3f\n",
+		       ratio, TEST_ADD_RATIO + TEST_EPSILON);
+	      stream->msg = "strange encoding";
+	      return EINVAL;
+	    }
+
+	  if (ratio <= TEST_ADD_RATIO - TEST_EPSILON)
+	    {
+	      P(RINT "xdelta3: test encode with size ratio %.3f, expected > %.3f\n",
+		       ratio, TEST_ADD_RATIO - TEST_EPSILON);
+	      stream->msg = "strange encoding";
+	      return EINVAL;
+	    }
+
+	  /* Also check that compare_files works.  The delta and original should not be
+	   * identical. */
+	  if ((ret = compare_files (stream, TEST_DELTA_FILE, TEST_TARGET_FILE)) == 0)
+	    {
+	      stream->msg = "broken compare_files";
+	      return EINVAL;
+	    }
+	}
+      else
+	{
+	  /* In subsequent passes, verify that the copy and delta are the same. */
+	  if ((ret = compare_files (stream, TEST_COPY_FILE, TEST_DELTA_FILE)))
+	    {
+	      return ret;
+	    }
+	}
+
+      test_cleanup ();
+      DOT ();
+    }
+
+  return 0;
+}
+
+/******************************************************************************************
+ EXTERNAL I/O DECOMPRESSION/RECOMPRESSION
+ ******************************************************************************************/
+
+#if EXTERNAL_COMPRESSION
+/* This performs one step of the test_externally_compressed_io function described below.
+ * It builds a pipe containing both Xdelta and external compression/decompression that
+ * should not modify the data passing through. */
+static int
+test_compressed_pipe (xd3_stream *stream, main_extcomp *ext, char* buf,
+		      const char* comp_options, const char* decomp_options,
+		      int do_ext_recomp, const char* msg)
+{
+  int ret;
+  char decomp_buf[TESTBUFSIZE];
+
+  if (do_ext_recomp)
+    {
+      sprintf (decomp_buf, " | %s %s", ext->decomp_cmdname, ext->decomp_options);
+    }
+  else
+    {
+      decomp_buf[0] = 0;
+    }
+
+  sprintf (buf, "%s %s < %s | %s %s | %s %s%s > %s",
+	   ext->recomp_cmdname, ext->recomp_options,
+	   TEST_TARGET_FILE,
+	   program_name, comp_options,
+	   program_name, decomp_options,
+	   decomp_buf,
+	   TEST_RECON_FILE);
+
+  if ((ret = system (buf)) != 0)
+    {
+      stream->msg = msg;
+      return EINVAL;
+    }
+
+  if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE)))
+    {
+      return EINVAL;
+    }
+
+  DOT ();
+  return 0;
+}
+
+/* We want to test that a pipe such as:
+ *
+ * --> | gzip -cf | xdelta3 -cf | xdelta3 -dcf | gzip -dcf | -->
+ *
+ * is transparent, i.e., does not modify the stream of data.  However, we also want to
+ * verify that at the center the data is properly compressed, i.e., that we do not just
+ * have a re-compressed gzip format, that we have an VCDIFF format.  We do this in two
+ * steps.  First test the above pipe, then test with suppressed output recompression
+ * (-D).  The result should be the original input:
+ *
+ * --> | gzip -cf | xdelta3 -cf | xdelta3 -Ddcf | -->
+ *
+ * Finally we want to test that -D also disables input decompression:
+ *
+ * --> | gzip -cf | xdelta3 -Dcf | xdelta3 -Ddcf | gzip -dcf | -->
+ */
+static int
+test_externally_compressed_io (xd3_stream *stream, int ignore)
+{
+  int i, ret;
+  char buf[TESTBUFSIZE];
+
+  srand48 (0x91723913);
+
+  if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; }
+
+  for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
+    {
+      main_extcomp *ext = & extcomp_types[i];
+
+      /* Test for the existence of the external command first, if not skip. */
+      sprintf (buf, "%s %s < /dev/null > /dev/null", ext->recomp_cmdname, ext->recomp_options);
+
+      if ((ret = system (buf)) != 0)
+	{
+	  P(RINT "%s=0", ext->recomp_cmdname);
+	  continue;
+	}
+
+      if ((ret = test_compressed_pipe (stream, ext, buf, "-cfq", "-dcfq", 1,
+				       "compression failed: identity pipe")) ||
+	  (ret = test_compressed_pipe (stream, ext, buf, "-cfq", "-Rdcfq", 0,
+				       "compression failed: without recompression")) ||
+	  (ret = test_compressed_pipe (stream, ext, buf, "-Dcfq", "-Rdcfq", 1,
+				       "compression failed: without decompression")))
+	{
+	  return ret;
+	}
+    }
+
+  return 0;
+}
+
+/* This tests the proper functioning of external decompression for source files.  The
+ * source and target files are identical and compressed by gzip.  Decoding such a delta
+ * with recompression disbaled (-R) should produce the original, uncompressed
+ * source/target file.  Then it checks with output recompression enabled--in this case the
+ * output should be a compressed copy of the original source/target file.  Then it checks
+ * that encoding with decompression disabled works--the compressed files are identical and
+ * decoding them should always produce a compressed output, regardless of -R since the
+ * encoded delta file had decompression disabled..
+ */
+static int
+test_source_decompression (xd3_stream *stream, int ignore)
+{
+  int ret;
+  char buf[TESTBUFSIZE];
+  const main_extcomp *ext;
+
+  srand48 (0x9ff56acb);
+
+  test_setup ();
+  if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; }
+
+  /* Use gzip. */
+  if ((ext = main_get_compressor ("G")) == NULL) { P(RINT "skipped"); return 0; }
+
+  /* Save an uncompressed copy. */
+  if ((ret = test_save_copy (TEST_TARGET_FILE))) { return ret; }
+
+  /* Compress the target. */
+  sprintf (buf, "%s %s < %s > %s", ext->recomp_cmdname, ext->recomp_options, TEST_TARGET_FILE, TEST_SOURCE_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Copy back to the source. */
+  sprintf (buf, "cp -f %s %s", TEST_SOURCE_FILE, TEST_TARGET_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Now the two identical files are compressed.  Delta-encode the target, with decompression. */
+  sprintf (buf, "%s -eq -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Decode the delta file with recompression disabled, should get an uncompressed file
+   * out. */
+  sprintf (buf, "%s -dq -R -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  if ((ret = compare_files (stream, TEST_COPY_FILE, TEST_RECON_FILE))) { return ret; }
+
+  /* Decode the delta file with recompression, should get a compressed file out.  But we
+   * can't compare compressed files directly. */
+  sprintf (buf, "%s -dqf -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  sprintf (buf, "%s %s < %s > %s", ext->decomp_cmdname, ext->decomp_options, TEST_RECON_FILE, TEST_RECON2_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  if ((ret = compare_files (stream, TEST_COPY_FILE, TEST_RECON2_FILE))) { return ret; }
+
+  /* Encode with decompression disabled */
+  sprintf (buf, "%s -feqD -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Decode the delta file with recompression enabled, it doesn't matter, should get the
+   * compressed file out. */
+  sprintf (buf, "%s -fdq -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE))) { return ret; }
+
+  /* Try again with recompression disabled, it doesn't make a difference. */
+  sprintf (buf, "%s -fqRd -s%s %s %s", program_name, TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  if ((ret = compare_files (stream, TEST_TARGET_FILE, TEST_RECON_FILE))) { return ret; }
+  test_cleanup();
+  return 0;
+}
+#endif
+
+/******************************************************************************************
+ FORCE, STDOUT
+ ******************************************************************************************/
+
+/* This tests that output will not overwrite an existing file unless -f was specified.
+ * The test is for encoding (the same code handles it for decoding). */
+static int
+test_force_behavior (xd3_stream *stream, int ignore)
+{
+  int ret;
+  char buf[128];
+
+  /* Create empty target file */
+  test_setup ();
+  sprintf (buf, "cp /dev/null %s", TEST_TARGET_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Encode to delta file */
+  sprintf (buf, "%s -e %s %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Encode again, should fail. */
+  sprintf (buf, "%s -e %s %s 2> /dev/null", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_fail (stream, buf))) { return ret; }
+
+  /* Force it, should succeed. */
+  sprintf (buf, "%s -ef %s %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  test_cleanup();
+  return 0;
+}
+
+/* This checks the proper operation of the -c flag.  When specified the default output
+ * becomes stdout, otherwise the input must be provided (encode) or it may be defaulted
+ * (decode w/ app header). */
+static int
+test_stdout_behavior (xd3_stream *stream, int ignore)
+{
+  int ret;
+  char buf[128];
+
+  test_setup();
+  sprintf (buf, "cp /dev/null %s", TEST_TARGET_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Without -c, encode writes to delta file */
+  sprintf (buf, "%s -e %s %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* With -c, encode writes to stdout */
+  sprintf (buf, "%s -e -c %s > %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Without -c, decode writes to target file name, but it fails because the file exists. */
+  sprintf (buf, "%s -d %s 2> /dev/null", program_name, TEST_DELTA_FILE);
+  if ((ret = do_fail (stream, buf))) { return ret; }
+
+  /* With -c, decode writes to stdout */
+  sprintf (buf, "%s -d -c %s > /dev/null", program_name, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  test_cleanup();
+
+  return 0;
+}
+
+/* This tests that the no-output flag (-J) works. */
+static int
+test_no_output (xd3_stream *stream, int ignore)
+{
+  int ret;
+  char buf[TESTBUFSIZE];
+
+  test_setup ();
+  if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; }
+
+  /* Try no_output encode w/out unwritable output file */
+  sprintf (buf, "%s -e %s /dont_run_xdelta3_test_as_root 2> /dev/null", program_name, TEST_TARGET_FILE);
+  if ((ret = do_fail (stream, buf))) { return ret; }
+  sprintf (buf, "%s -J -e %s /dont_run_xdelta3_test_as_root", program_name, TEST_TARGET_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Now really write the delta to test decode no-output */
+  sprintf (buf, "%s -e %s %s", program_name, TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  sprintf (buf, "%s -d %s /dont_run_xdelta3_test_as_root 2> /dev/null", program_name, TEST_DELTA_FILE);
+  if ((ret = do_fail (stream, buf))) { return ret; }
+  sprintf (buf, "%s -J -d %s /dont_run_xdelta3_test_as_root", program_name, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  test_cleanup ();
+  return 0;
+}
+
+/******************************************************************************************
+ Source identical optimization
+ ******************************************************************************************/
+
+/* Computing a delta should be fastest when the two inputs are identical, this checks it.
+ * The library is called to compute a delta between a 10000 byte file, 1000 byte winsize,
+ * 500 byte source blocksize.  The same buffer is used for both source and target. */
+static int
+test_identical_behavior (xd3_stream *stream, int ignore)
+{
+#define IDB_TGTSZ 10000
+#define IDB_BLKSZ 500
+#define IDB_WINSZ 1000
+#define IDB_DELSZ 1000
+#define IDB_WINCNT (IDB_TGTSZ / IDB_WINSZ)
+
+  int ret, i;
+  uint8_t buf[IDB_TGTSZ];
+  uint8_t del[IDB_DELSZ];
+  uint8_t rec[IDB_TGTSZ];
+  xd3_source source;
+  int    encwin = 0;
+  usize_t delpos = 0, recsize;
+  xd3_config config;
+
+  for (i = 0; i < IDB_TGTSZ; i += 1) { buf[i] = lrand48 (); } 
+
+  stream->winsize = IDB_WINSZ;
+
+  source.size     = IDB_TGTSZ;
+  source.blksize  = IDB_BLKSZ;
+  source.name     = "";
+  source.curblk   = NULL;
+  source.curblkno = -1;
+
+  if ((ret = xd3_set_source (stream, & source))) { goto fail; }
+
+  /* Compute an delta between identical source and targets. */
+  for (;;)
+    {
+      ret = xd3_encode_input (stream);
+
+      if (ret == XD3_INPUT)
+	{
+	  if (encwin == IDB_WINCNT-1) { break; }
+	  xd3_avail_input (stream, buf + (IDB_WINSZ * encwin), IDB_WINSZ);
+	  encwin += 1;
+	  continue;
+	}
+
+      if (ret == XD3_GETSRCBLK)
+	{
+	  source.curblkno = source.getblkno;
+	  source.onblk    = IDB_BLKSZ;
+	  source.curblk   = buf + source.getblkno * IDB_BLKSZ;
+	  continue;
+	}
+
+      if (ret == XD3_WINSTART) { continue; }
+      if (ret == XD3_WINFINISH) { continue; }
+
+      if (ret != XD3_OUTPUT) { goto fail; }
+
+      XD3_ASSERT (delpos + stream->avail_out <= IDB_DELSZ);
+
+      memcpy (del + delpos, stream->next_out, stream->avail_out);
+
+      delpos += stream->avail_out;
+
+      xd3_consume_output (stream);
+    }
+
+  /* Reset. */
+  source.blksize  = IDB_TGTSZ;
+  source.onblk    = IDB_TGTSZ;
+  source.curblk   = buf;
+  source.curblkno = 0;
+
+  if ((ret = xd3_close_stream (stream))) { goto fail; }
+  xd3_free_stream (stream);
+  xd3_init_config (& config, 0);
+  if ((ret = xd3_config_stream (stream, & config))) { goto fail; }
+  if ((ret = xd3_set_source (stream, & source))) { goto fail; }
+
+  /* Decode. */
+  if ((ret = xd3_decode_completely (stream, del, delpos, rec, & recsize, IDB_TGTSZ))) { goto fail; }
+
+  /* Check result size and data. */
+  if (recsize != IDB_TGTSZ) { stream->msg = "wrong size reconstruction"; goto fail; }
+  if (memcmp (rec, buf, IDB_TGTSZ) != 0) { stream->msg = "wrong data reconstruction"; goto fail; }
+
+  /* Check that there was one copy per window. */
+  IF_DEBUG (if (stream->n_cpy != IDB_WINCNT ||
+		stream->n_add != 0 ||
+		stream->n_run != 0) { stream->msg = "wrong copy count"; goto fail; });
+
+  /* Check that no checksums were computed because the initial match was presumed. */
+  IF_DEBUG (if (stream->large_ckcnt != 0) { stream->msg = "wrong checksum behavior"; goto fail; });
+
+  ret = 0;
+ fail:
+  return ret;
+}
+
+/******************************************************************************************
+ String matching test
+ ******************************************************************************************/
+
+/* Check particular matching behaviors by calling xd3_string_match_soft directly with
+ * specific arguments. */
+typedef struct _string_match_test string_match_test;
+
+typedef enum
+{
+  SM_NONE    = 0,
+  SM_SSMATCH = (1 << 0),
+  SM_LAZY    = (1 << 1),
+  SM_PROMOTE = (1 << 2),
+} string_match_flags;
+
+struct _string_match_test
+{
+  const char *input;
+  int         flags;
+  const char *result;
+};
+
+static const string_match_test match_tests[] =
+{
+  /* nothing */
+  { "1234567890", SM_NONE, "" },
+
+  /* basic run, copy */
+  { "11111111112323232323", SM_NONE, "R0/10 C12/8@10" },
+
+  /* no run smaller than MIN_RUN=8 */
+  { "1111111",  SM_NONE, "C1/6@0" },
+  { "11111111", SM_NONE, "R0/8" },
+
+  /* simple promotion: the third copy address depends on promotion */
+  { "ABCDEF_ABCDEF^ABCDEF", SM_NONE,    "C7/6@0 C14/6@7" },
+  { "ABCDEF_ABCDEF^ABCDEF", SM_PROMOTE, "C7/6@0 C14/6@0" },
+
+  /* simple lazy: there is a better copy starting with "23 X" than "123 " */
+  { "123 23 XYZ 123 XYZ", SM_NONE, "C11/4@0" },
+  { "123 23 XYZ 123 XYZ", SM_LAZY, "C11/4@0 C12/6@4" },
+
+  /* trylazy: no lazy matches unless there are at least two characters beyond the first
+   * match */
+  { "2123_121212",   SM_LAZY, "C7/4@5" },
+  { "2123_1212123",  SM_LAZY, "C7/4@5" },
+  { "2123_1212123_", SM_LAZY, "C7/4@5 C8/5@0" },
+
+  /* trylazy: no lazy matches if the copy is >= MAXLAZY=10 */
+  { "2123_121212123_",   SM_LAZY, "C7/6@5 C10/5@0" },
+  { "2123_12121212123_", SM_LAZY, "C7/8@5 C12/5@0" },
+  { "2123_1212121212123_", SM_LAZY, "C7/10@5" },
+
+  /* lazy run: check a run overlapped by a longer copy */
+  { "11111112 111111112 1", SM_LAZY, "C1/6@0 R9/8 C10/10@0" },
+
+  /* lazy match: match_length,run_l >= min_match tests, shouldn't get any copies within
+   * the run, no run within the copy */
+  { "^________^________  ", SM_LAZY, "R1/8 C9/9@0" },
+
+  /* chain depth: it only goes back 10. this checks that the 10th match hits and the 11th
+   * misses. */
+  { "1234 1234_1234-1234=1234+1234[1234]1234{1234}1234<1234 ", SM_NONE,
+    "C5/4@0 C10/4@5 C15/4@10 C20/4@15 C25/4@20 C30/4@25 C35/4@30 C40/4@35 C45/4@40 C50/5@0" },
+  { "1234 1234_1234-1234=1234+1234[1234]1234{1234}1234<1234>1234 ", SM_NONE,
+    "C5/4@0 C10/4@5 C15/4@10 C20/4@15 C25/4@20 C30/4@25 C35/4@30 C40/4@35 C45/4@40 C50/4@45 C55/4@50" },
+
+  /* ssmatch test */
+  { "ABCDE___ABCDE*** BCDE***", SM_NONE, "C8/5@0 C17/4@1" },
+  { "ABCDE___ABCDE*** BCDE***", SM_SSMATCH, "C8/5@0 C17/7@9" },
+};
+
+static int
+test_string_matching (xd3_stream *stream, int ignore)
+{
+  int i, ret;
+  xd3_config config;
+  char rbuf[TESTBUFSIZE];
+
+  for (i = 0; i < SIZEOF_ARRAY (match_tests); i += 1)
+    {
+      const string_match_test *test = & match_tests[i];
+      char *rptr = rbuf;
+      usize_t len = strlen (test->input);
+
+      xd3_free_stream (stream);
+      xd3_init_config (& config, 0);
+
+      config.smatch_cfg   = XD3_SMATCH_SOFT;
+      config.large_look   = 4;
+      config.large_step   = 4;
+      config.small_look   = 4;
+      config.small_chain  = 10;
+      config.small_lchain = 10;
+      config.max_lazy     = 10;
+      config.long_enough  = 10;
+      config.ssmatch      = (test->flags & SM_SSMATCH) && 1;
+      config.try_lazy     = (test->flags & SM_LAZY) && 1;
+      config.promote      = (test->flags & SM_PROMOTE) && 1;
+
+      if ((ret = xd3_config_stream (stream, & config))) { return ret; }
+      if ((ret = xd3_encode_init (stream))) { return ret; }
+
+      xd3_avail_input (stream, (uint8_t*)test->input, len);
+
+      if ((ret = stream->string_match (stream))) { return ret; }
+
+      *rptr = 0;
+      while (! xd3_rlist_empty (& stream->iopt.used))
+	{
+	  xd3_rinst *inst = xd3_rlist_pop_front (& stream->iopt.used);
+
+	  switch (inst->type)
+	    {
+	    case XD3_RUN: *rptr++ = 'R'; break;
+	    case XD3_CPY: *rptr++ = 'C'; break;
+	    default: XD3_ASSERT (0);
+	    }
+
+	  sprintf (rptr, "%d/%d", inst->pos, inst->size);
+	  rptr += strlen (rptr);
+
+	  if (inst->type == XD3_CPY)
+	    {
+	      *rptr++ = '@';
+	      sprintf (rptr, "%"Q"d", inst->addr);
+	      rptr += strlen (rptr);
+	    }
+
+	  *rptr++ = ' ';
+
+	  xd3_rlist_push_back (& stream->iopt.free, inst);
+	}
+
+      if (rptr != rbuf)
+	{
+	  rptr -= 1; *rptr = 0;
+	}
+
+      if (strcmp (rbuf, test->result) != 0)
+	{
+	  P(RINT "test %u: expected %s: got %s", i, test->result, rbuf);
+	  stream->msg = "wrong result";
+	  return EINVAL;
+	}
+    }
+
+  return 0;
+}
+
+/******************************************************************************************
+ Source window advance, update
+ ******************************************************************************************/
+
+/*
+ * This is a test for many overlapping instructions. It must be a lazy
+ * matcher.
+ */
+static int
+test_iopt_flush_instructions (xd3_stream *stream, int ignore)
+{
+  int ret, i, tpos = 0;
+  usize_t delta_size, recon_size;
+  xd3_config config;
+  uint8_t target[TESTBUFSIZE];
+  uint8_t delta[TESTBUFSIZE];
+  uint8_t recon[TESTBUFSIZE];
+
+  xd3_free_stream (stream);
+  xd3_init_config (& config, 0);
+
+  config.smatch_cfg    = XD3_SMATCH_SOFT;
+  config.large_look    = 16;
+  config.large_step    = 16;
+  config.small_look    = 4;
+  config.small_chain   = 128;
+  config.small_lchain  = 16;
+  config.ssmatch       = 0;
+  config.try_lazy      = 1;
+  config.max_lazy      = 8;
+  config.long_enough   = 128;
+  config.promote       = 0;
+
+  if ((ret = xd3_config_stream (stream, & config))) { return ret; }
+
+  for (i = 1; i < 250; i++)
+    {
+      target[tpos++] = i;
+      target[tpos++] = i+1;
+      target[tpos++] = i+2;
+      target[tpos++] = i+3;
+      target[tpos++] = 0;
+    }
+  for (i = 1; i < 253; i++)
+    {
+      target[tpos++] = i;
+    }
+
+  if ((ret = xd3_encode_completely (stream, target, tpos,
+				    delta, & delta_size, sizeof (delta))))
+    {
+      return ret;
+    }
+
+  xd3_free_stream(stream);
+  if ((ret = xd3_config_stream (stream, & config))) { return ret; }
+
+  if ((ret = xd3_decode_completely (stream, delta, delta_size,
+				    recon, & recon_size, sizeof (recon))))
+    {
+      return ret;
+    }
+
+  XD3_ASSERT(tpos == recon_size);
+  XD3_ASSERT(memcmp(target, recon, recon_size) == 0);
+
+  return 0;
+}
+
+/******************************************************************************************
+ TEST MAIN
+ ******************************************************************************************/
+
+static int
+xd3_selftest (void)
+{
+#define DO_TEST(fn,flags,arg)                                         \
+  do {                                                                \
+    xd3_stream stream;                                                \
+    xd3_config config;                                                \
+    xd3_init_config (& config, flags);                                \
+    P(RINT "xdelta3: testing " #fn "%s...",                           \
+             flags ? (" (" #flags ")") : "");                         \
+    if ((ret = xd3_config_stream (& stream, & config) == 0) &&        \
+        (ret = test_ ## fn (& stream, arg)) == 0) {                   \
+      P(RINT " success\n");                                           \
+    } else {                                                          \
+      P(RINT " failed: %s: %s\n", xd3_errstring (& stream),           \
+               xd3_strerror (ret)); }                                 \
+    xd3_free_stream (& stream);                                       \
+    if (ret != 0) { goto failure; }                                   \
+  } while (0)
+
+  int ret;
+
+  DO_TEST (random_numbers, 0, 0);
+  DO_TEST (decode_integer_end_of_input, 0, 0);
+  DO_TEST (decode_integer_overflow, 0, 0);
+  DO_TEST (encode_decode_uint32_t, 0, 0);
+  DO_TEST (encode_decode_uint64_t, 0, 0);
+  DO_TEST (usize_t_overflow, 0, 0);
+
+  DO_TEST (address_cache, 0, 0);
+  IF_GENCODETBL (DO_TEST (address_cache, XD3_ALT_CODE_TABLE, 0));
+
+  DO_TEST (string_matching, 0, 0);
+
+  DO_TEST (choose_instruction, 0, 0);
+  IF_GENCODETBL (DO_TEST (choose_instruction, XD3_ALT_CODE_TABLE, 0));
+  IF_GENCODETBL (DO_TEST (encode_code_table, 0, 0));
+
+  DO_TEST (identical_behavior, 0, 0);
+  DO_TEST (iopt_flush_instructions, 0, 0);
+
+  IF_DJW (DO_TEST (secondary_huff, 0, DJW_MAX_GROUPS));
+  IF_FGK (DO_TEST (secondary_fgk, 0, 1));
+
+  DO_TEST (decompress_single_bit_error, 0, 3);
+  DO_TEST (decompress_single_bit_error, XD3_ADLER32, 3);
+
+  IF_FGK (DO_TEST (decompress_single_bit_error, XD3_SEC_FGK, 3));
+  IF_DJW (DO_TEST (decompress_single_bit_error, XD3_SEC_DJW, 8));
+
+  /* There are many expected non-failures for ALT_CODE_TABLE because not all of the
+   * instruction codes are used. */
+  IF_GENCODETBL (DO_TEST (decompress_single_bit_error, XD3_ALT_CODE_TABLE, 224));
+
+  DO_TEST (compressed_stream_overflow, 0, 0);
+
+  /* The following tests have random failures on my OSX box.
+   */
+  DO_TEST (force_behavior, 0, 0);
+  DO_TEST (stdout_behavior, 0, 0);
+  DO_TEST (no_output, 0, 0);
+  DO_TEST (command_line_arguments, 0, 0);
+
+#if EXTERNAL_COMPRESSION
+  DO_TEST (source_decompression, 0, 0);
+  DO_TEST (externally_compressed_io, 0, 0);
+#endif
+
+failure:
+  test_cleanup ();
+  return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+#undef DO_TEST
+}
diff --git a/xdelta3/xdelta3.c b/xdelta3/xdelta3.c
new file mode 100755
index 0000000..fb9a09f
--- /dev/null
+++ b/xdelta3/xdelta3.c
@@ -0,0 +1,6022 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2001, 2003, 2004, 2005, 2006.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+   -------------------------------------------------------------------
+
+			       Xdelta 3
+
+   The goal of this library is to to implement both the (stand-alone)
+   data-compression and delta-compression aspects of VCDIFF encoding, and
+   to support a programming interface that works like Zlib
+   (http://www.gzip.org/zlib.html). See RFC3284: The VCDIFF Generic
+   Differencing and Compression Data Format.
+
+   VCDIFF is a unified encoding that combines data-compression and
+   delta-encoding ("differencing").
+
+   VCDIFF has a detailed byte-code instruction set with many features.
+   The instruction format supports an immediate size operand for small
+   COPYs and ADDs (e.g., under 18 bytes).  There are also instruction
+   "modes", which are used to compress COPY addresses by using two
+   address caches.  An instruction mode refers to slots in the NEAR
+   and SAME caches for recent addresses.  NEAR remembers the
+   previous 4 (by default) COPY addresses, and SAME catches
+   frequent re-uses of the same address using a 3-way (by default)
+   256-entry associative cache of [ADDR mod 256], the encoded byte.
+   A hit in the NEAR/SAME cache requires 0/1 ADDR bytes.
+
+   VCDIFF has a default instruction table, but an alternate
+   instruction tables may themselves be be delta-compressed and
+   included in the encoding header.  This allows even more freedom.
+   There are 9 instruction modes in the default code table, 4 near, 3
+   same, VCD_SELF (absolute encoding) and VCD_HERE (relative to the
+   current position).
+
+   ----------------------------------------------------------------------
+
+  			      Algorithms
+
+   Aside from the details of encoding and decoding, there are a bunch
+   of algorithms needed.
+
+   1. STRING-MATCH.  A two-level fingerprinting approach is used.  A
+   single loop computes the two checksums -- small and large -- at
+   successive offsets in the TARGET file.  The large checksum is more
+   accurate and is used to discover SOURCE matches, which are
+   potentially very long.  The small checksum is used to discover
+   copies within the TARGET.  Small matching, which is more expensive,
+   usually dominates the large STRING-MATCH costs in this code - the
+   more exhaustive the search, the better the results.  Either of the
+   two string-matching mechanisms may be disabled.  Currently, large
+   checksums are only performed in the source file, if present, and
+   small checksums are performed only in the left-over target input.
+   However, small matches are possible in the source file too, with a
+   range of possibilities.  [I've seen a paper on this subject, but
+   I lost it.]
+
+   2. INSTRUCTION SELECTION.  The IOPT buffer here represents a queue
+   used to store overlapping copy instructions.  There are two possible
+   optimizations that go beyond a greedy search.  Both of these fall
+   into the category of "non-greedy matching" optimizations.
+
+   The first optimization stems from backward SOURCE-COPY matching.
+   When a new SOURCE-COPY instruction covers a previous instruction in
+   the target completely, it is erased from the queue.  Randal Burns
+   originally analyzed these algorithms and did a lot of related work
+   (\cite the 1.5-pass algorithm).
+
+   The second optimization comes by the encoding of common very-small
+   COPY and ADD instructions, for which there are special DOUBLE-code
+   instructions, which code two instructions in a single byte.
+
+   The cost of bad instruction-selection overhead is relatively high
+   for data-compression, relative to delta-compression, so this second
+   optimization is fairly important.  With "lazy" matching (the name
+   used in Zlib for a similar optimization), the string-match
+   algorithm searches after a match for potential overlapping copy
+   instructions.  In Xdelta and by default, VCDIFF, the minimum match
+   size is 4 bytes, whereas Zlib searches with a 3-byte minimum.  This
+   feature, combined with double instructions, provides a nice
+   challenge.  Search in this file for "black magic", a heuristic.
+
+   3. STREAM ALIGNMENT.  Stream alignment is needed to compress large
+   inputs in constant space. TODO: redocument
+
+   4. WINDOW SELECTION.  When the IOPT buffer flushes, in the first call
+   to xd3_iopt_finish_encoding containing any kind of copy instruction,
+   the parameters of the source window must be decided: the offset into
+   the source and the length of the window.  Since the IOPT buffer is
+   finite, the program may be forced to fix these values before knowing
+   the best offset/length.  XD3_DEFAULT_SRCBACK limits the length, but a
+   smaller length is preferred because all target copies are addressed
+   after source copies in the VCDIFF address space.  Picking too large a
+   source window means larger address encoding.
+
+   If the IOPT buffer is filling easily, perhaps the target window is
+   too large.  In any case, a decision is made (though an alternative is
+   to emit the sub-window right away, to reduce the winsize
+   automatically - not implemented, another alternative is to grow the
+   IOPT buffer, it is after all bounded in size by winsize.)
+
+   The algorithm is in xd3_srcwin_setup.
+
+   5. SECONDARY COMPRESSION.  VCDIFF supports a secondary encoding to
+   be applied to the individual sections of the data format, which are
+   ADDRess, INSTruction, and DATA.  Several secondary compressor
+   variations are implemented here, although none is standardized yet.
+
+   One is an adaptive huffman algorithm -- the FGK algorithm (Faller,
+   Gallager, and Knuth, 1985).  This compressor is extremely slow.
+
+   The other is a simple static Huffman routine, which is the base
+   case of a semi-adaptive scheme published by D.J. Wheeler and first
+   widely used in bzip2 (by Julian Seward).  This is a very
+   interesting algorithm, originally published in nearly cryptic form
+   by D.J. Wheeler. !!!NOTE!!! Because these are not standardized, the
+   -S option (no secondary compression) remains on by default.
+     ftp://ftp.cl.cam.ac.uk/users/djw3/bred3.{c,ps}
+   --------------------------------------------------------------------
+
+			    Other Features
+
+   1. USER CONVENIENCE
+
+   For user convenience, it is essential to recognize Gzip-compressed
+   files and automatically Gzip-decompress them prior to
+   delta-compression (or else no delta-compression will be achieved
+   unless the user manually decompresses the inputs).  The compressed
+   represention competes with Xdelta, and this must be hidden from the
+   command-line user interface.  The Xdelta-1.x encoding was simple, not
+   compressed itself, so Xdelta-1.x uses Zlib internally to compress the
+   representation.
+
+   This implementation supports external compression, which implements
+   the necessary fork() and pipe() mechanics.  There is a tricky step
+   involved to support automatic detection of a compressed input in a
+   non-seekable input.  First you read a bit of the input to detect
+   magic headers.  When a compressed format is recognized, exec() the
+   external compression program and create a second child process to
+   copy the original input stream. [Footnote: There is a difficulty
+   related to using Gzip externally. It is not possible to decompress
+   and recompress a Gzip file transparently.  If FILE.GZ had a
+   cryptographic signature, then, after: (1) Gzip-decompression, (2)
+   Xdelta-encoding, (3) Gzip-compression the signature could be
+   broken.  The only way to solve this problem is to guess at Gzip's
+   compression level or control it by other means.  I recommend that
+   specific implementations of any compression scheme store
+   information needed to exactly re-compress the input, that way
+   external compression is transparent - however, this won't happen
+   here until it has stabilized.]
+
+   2. APPLICATION-HEADER
+
+   This feature was introduced in RFC3284.  It allows any application
+   to include a header within the VCDIFF file format.  This allows
+   general inter-application data exchange with support for
+   application-specific extensions to communicate metadata.
+
+   3. VCDIFF CHECKSUM
+
+   An optional checksum value is included with each window, which can
+   be used to validate the final result.  This verifies the correct source
+   file was used for decompression as well as the obvious advantage:
+   checking the implementation (and underlying) correctness.
+
+   4. LIGHT WEIGHT
+
+   The code makes efforts to avoid copying data more than necessary.
+   The code delays many initialization tasks until the first use, it
+   optimizes for identical (perfectly matching) inputs.  It does not
+   compute any checksums until the first lookup misses.  Memory usage
+   is reduced.  String-matching is templatized (by slightly gross use
+   of CPP) to hard-code alternative compile-time defaults.  The code
+   has few outside dependencies.
+   ----------------------------------------------------------------------
+
+		The default rfc3284 instruction table:
+		    (see RFC for the explanation)
+
+           TYPE      SIZE     MODE    TYPE     SIZE     MODE     INDEX
+   --------------------------------------------------------------------
+       1.  Run         0        0     Noop       0        0        0
+       2.  Add    0, [1,17]     0     Noop       0        0      [1,18]
+       3.  Copy   0, [4,18]     0     Noop       0        0     [19,34]
+       4.  Copy   0, [4,18]     1     Noop       0        0     [35,50]
+       5.  Copy   0, [4,18]     2     Noop       0        0     [51,66]
+       6.  Copy   0, [4,18]     3     Noop       0        0     [67,82]
+       7.  Copy   0, [4,18]     4     Noop       0        0     [83,98]
+       8.  Copy   0, [4,18]     5     Noop       0        0     [99,114]
+       9.  Copy   0, [4,18]     6     Noop       0        0    [115,130]
+      10.  Copy   0, [4,18]     7     Noop       0        0    [131,146]
+      11.  Copy   0, [4,18]     8     Noop       0        0    [147,162]
+      12.  Add       [1,4]      0     Copy     [4,6]      0    [163,174]
+      13.  Add       [1,4]      0     Copy     [4,6]      1    [175,186]
+      14.  Add       [1,4]      0     Copy     [4,6]      2    [187,198]
+      15.  Add       [1,4]      0     Copy     [4,6]      3    [199,210]
+      16.  Add       [1,4]      0     Copy     [4,6]      4    [211,222]
+      17.  Add       [1,4]      0     Copy     [4,6]      5    [223,234]
+      18.  Add       [1,4]      0     Copy       4        6    [235,238]
+      19.  Add       [1,4]      0     Copy       4        7    [239,242]
+      20.  Add       [1,4]      0     Copy       4        8    [243,246]
+      21.  Copy        4      [0,8]   Add        1        0    [247,255]
+   --------------------------------------------------------------------
+
+		     Reading the source: Overview
+
+   This file includes itself in several passes to macro-expand certain
+   sections with variable forms.  Just read ahead, there's only a
+   little confusion.  I know this sounds ugly, but hard-coding some of
+   the string-matching parameters results in a 10-15% increase in
+   string-match performance.  The only time this hurts is when you have
+   unbalanced #if/endifs.
+
+   A single compilation unit tames the Makefile.  In short, this is to
+   allow the above-described hack without an explodingMakefile.  The
+   single compilation unit includes the core library features,
+   configurable string-match templates, optional main() command-line
+   tool, misc optional features, and a regression test.  Features are
+   controled with CPP #defines, see Makefile.am.
+
+   The initial __XDELTA3_C_HEADER_PASS__ starts first, the INLINE and
+   TEMPLATE sections follow.  Easy stuff first, hard stuff last.
+
+   Optional features include:
+
+     xdelta3-main.h     The command-line interface, external compression
+                        support, POSIX-specific, info & VCDIFF-debug tools.
+     xdelta3-second.h   The common secondary compression routines.
+     xdelta3-djw.h      The semi-adaptive huffman secondary encoder.
+     xdelta3-fgk.h      The adaptive huffman secondary encoder.
+     xdelta3-test.h     The unit test covers major algorithms,
+                        encoding and decoding.  There are single-bit
+                        error decoding tests.  There are 32/64-bit file size
+                        boundary tests.  There are command-line tests.
+                        There are compression tests.  There are external
+                        compression tests.  There are string-matching tests.
+			There should be more tests...
+
+   Additional headers include:
+
+     xdelta3.h          The public header file.
+     xdelta3-cfgs.h     The default settings for default, built-in
+                        encoders.  These are hard-coded at
+                        compile-time.  There is also a single
+                        soft-coded string matcher for experimenting
+                        with arbitrary values.
+     xdelta3-list.h     A cyclic list template
+
+   Misc little debug utilities:
+
+     badcopy.c          Randomly modifies an input file based on two
+                        parameters: (1) the probability that a byte in
+                        the file is replaced with a pseudo-random value,
+                        and (2) the mean change size.  Changes are
+                        generated using an expoential distribution
+                        which approximates the expected error_prob
+			distribution.
+     show.c             Prints an offset/length segment from a file.
+     testh.c            Checks that xdelta3.h is can be #included
+   --------------------------------------------------------------------
+
+   This file itself is unusually large.  I hope to defend this layout
+   with lots of comments.  Everything in this file is related to
+   encoding and decoding.  I like it all together - the template stuff
+   is just a hack. */
+
+#ifndef __XDELTA3_C_HEADER_PASS__
+#define __XDELTA3_C_HEADER_PASS__
+
+#include <errno.h>
+#include <string.h>
+
+#include "xdelta3.h"
+
+/******************************************************************************************
+ STATIC CONFIGURATION
+ ******************************************************************************************/
+
+#ifndef XD3_MAIN                  /* the main application */
+#define XD3_MAIN 0
+#endif
+
+#ifndef VCDIFF_TOOLS
+#define VCDIFF_TOOLS XD3_MAIN
+#endif
+
+#ifndef SECONDARY_FGK             /* one from the algorithm preservation department: */
+#define SECONDARY_FGK 0           /* adaptive Huffman routines */
+#endif
+
+#ifndef SECONDARY_DJW             /* semi-adaptive/static Huffman for the eventual */
+#define SECONDARY_DJW 0           /* standardization, off by default until such time. */
+#endif
+
+#ifndef GENERIC_ENCODE_TABLES    /* These three are the RFC-spec'd app-specific */
+#define GENERIC_ENCODE_TABLES 0  /* code features.  This is tested but not recommended */
+#endif  			 /* unless there's a real application. */
+#ifndef GENERIC_ENCODE_TABLES_COMPUTE
+#define GENERIC_ENCODE_TABLES_COMPUTE 0
+#endif
+#ifndef GENERIC_ENCODE_TABLES_COMPUTE_PRINT
+#define GENERIC_ENCODE_TABLES_COMPUTE_PRINT 0
+#endif
+
+#if XD3_USE_LARGEFILE64          /* How does everyone else do this? */
+#define Q "q"
+#else
+#define Q
+#endif
+
+#if XD3_ENCODER
+#define IF_ENCODER(x) x
+#else
+#define IF_ENCODER(x)
+#endif
+
+/******************************************************************************************/
+
+typedef enum {
+
+  /* header indicator bits */
+  VCD_SECONDARY  = (1 << 0),  /* uses secondary compressor */
+  VCD_CODETABLE  = (1 << 1),  /* supplies code table data */
+  VCD_APPHEADER  = (1 << 2),  /* supplies application data */
+  VCD_INVHDR     = ~7U,
+
+  /* window indicator bits */
+  VCD_SOURCE     = (1 << 0),  /* copy window in source file */
+  VCD_TARGET     = (1 << 1),  /* copy window in target file */
+  VCD_ADLER32    = (1 << 2),  /* has adler32 checksum */
+  VCD_INVWIN     = ~7U,
+
+  VCD_SRCORTGT   = VCD_SOURCE | VCD_TARGET,
+
+  /* delta indicator bits */
+  VCD_DATACOMP   = (1 << 0),
+  VCD_INSTCOMP   = (1 << 1),
+  VCD_ADDRCOMP   = (1 << 2),
+  VCD_INVDEL     = ~0x7U,
+
+} xd3_indicator;
+
+typedef enum {
+  VCD_DJW_ID    = 1,
+  VCD_FGK_ID    = 16, /* !!!Note: these are not a standard IANA-allocated ID!!! */
+} xd3_secondary_ids;
+
+typedef enum {
+  SEC_NOFLAGS     = 0,
+  SEC_COUNT_FREQS = (1 << 0), /* OPT: Not implemented: Could eliminate first pass of Huffman... */
+} xd3_secondary_flags;
+
+typedef enum {
+  DATA_SECTION, /* These indicate which section to the secondary compressor. */
+  INST_SECTION, /* The header section is not compressed, therefore not listed here. */
+  ADDR_SECTION,
+} xd3_section_type;
+
+typedef enum
+{
+  XD3_NOOP = 0,
+  XD3_ADD  = 1,
+  XD3_RUN  = 2,
+  XD3_CPY  = 3, /* XD3_CPY rtypes are represented as (XD3_CPY + copy-mode value) */
+} xd3_rtype;
+
+/******************************************************************************************/
+
+#include "xdelta3-list.h"
+
+XD3_MAKELIST(xd3_rlist, xd3_rinst, link);
+
+/******************************************************************************************/
+
+#ifndef unlikely              /* The unlikely macro - any good? */
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define unlikely(x) __builtin_expect((x),0)
+#define likely(x)   __builtin_expect((x),1)
+#else
+#define unlikely(x) (x)
+#define likely(x)   (x)
+#endif
+#endif
+
+#define SECONDARY_MIN_SAVINGS 2  /* Secondary compression has to save at least this many bytes. */
+#define SECONDARY_MIN_INPUT   10 /* Secondary compression needs at least this many bytes. */
+
+#define VCDIFF_MAGIC1  0xd6  /* 1st file byte */
+#define VCDIFF_MAGIC2  0xc3  /* 2nd file byte */
+#define VCDIFF_MAGIC3  0xc4  /* 3rd file byte */
+#define VCDIFF_VERSION 0x00  /* 4th file byte */
+
+#define VCD_SELF       0     /* 1st address mode */
+#define VCD_HERE       1     /* 2nd address mode */
+
+#define CODE_TABLE_STRING_SIZE (6 * 256) /* Should fit a code table string. */
+#define CODE_TABLE_VCDIFF_SIZE (6 * 256) /* Should fit a compressed code table string */
+
+#define SECONDARY_ANY (SECONDARY_DJW || SECONDARY_FGK) /* True if any secondary compressor is used. */
+
+#define ALPHABET_SIZE      256  /* Used in test code--size of the secondary compressor alphabet. */
+
+#define HASH_PRIME         0    /* Old hashing experiments */
+#define HASH_PERMUTE       1
+#define ARITH_SMALL_CKSUM  1
+
+#define HASH_CKOFFSET      1U   /* Table entries distinguish "no-entry" from offset 0 using this offset. */
+
+#define MIN_SMALL_LOOK    2U    /* Match-optimization stuff. */
+#define MIN_LARGE_LOOK    2U
+#define MIN_MATCH_OFFSET  1U
+#define MAX_MATCH_SPLIT   18U   /* VCDIFF code table: 18 is the default limit for direct-coded ADD sizes */
+
+#define LEAST_MATCH_INCR  0   /* The least number of bytes an overlapping match must beat
+			       * the preceding match by.  This is a bias for the lazy
+			       * match optimization.  A non-zero value means that an
+			       * adjacent match has to be better by more than the step
+			       * between them.  0. */
+
+#define MIN_MATCH         4U  /* VCDIFF code table: MIN_MATCH=4 */
+#define MIN_ADD           1U  /* 1 */
+#define MIN_RUN           8U  /* The shortest run, if it is shorter than this an immediate
+			       * add/copy will be just as good.  ADD1/COPY6 = 1I+1D+1A bytes,
+			       * RUN18 = 1I+1D+1A. */
+
+#define MAX_MODES         9  /* Maximum number of nodes used for compression--does not limit decompression. */
+
+#define ENC_SECTS         4  /* Number of separate output sections. */
+
+#define HDR_TAIL(s)  (stream->enc_tails[0])
+#define DATA_TAIL(s) (stream->enc_tails[1])
+#define INST_TAIL(s) (stream->enc_tails[2])
+#define ADDR_TAIL(s) (stream->enc_tails[3])
+
+#define HDR_HEAD(s)  (stream->enc_heads[0])
+#define DATA_HEAD(s) (stream->enc_heads[1])
+#define INST_HEAD(s) (stream->enc_heads[2])
+#define ADDR_HEAD(s) (stream->enc_heads[3])
+
+#define SIZEOF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
+
+#define TOTAL_MODES(x) (2+(x)->acache.s_same+(x)->acache.s_near)
+
+/* Template instances. */
+#if XD3_BUILD_SLOW
+#define IF_BUILD_SLOW(x) x
+#else
+#define IF_BUILD_SLOW(x)
+#endif
+#if XD3_BUILD_FAST
+#define IF_BUILD_FAST(x) x
+#else
+#define IF_BUILD_FAST(x)
+#endif
+#if XD3_BUILD_SOFT
+#define IF_BUILD_SOFT(x) x
+#else
+#define IF_BUILD_SOFT(x)
+#endif
+
+IF_BUILD_SOFT(static const xd3_smatcher    __smatcher_soft;)
+IF_BUILD_FAST(static const xd3_smatcher    __smatcher_fast;)
+IF_BUILD_SLOW(static const xd3_smatcher    __smatcher_slow;)
+
+#if XD3_DEBUG
+#define SMALL_HASH_DEBUG1(s,inp)                                  \
+  usize_t debug_hval = xd3_checksum_hash (& (s)->small_hash,       \
+         xd3_scksum ((inp), (s)->small_look))
+#define SMALL_HASH_DEBUG2(s,inp)                                  \
+  XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \
+	 xd3_scksum ((inp), (s)->small_look)))
+#define SMALL_HASH_STATS(x) x
+#else
+#define SMALL_HASH_DEBUG1(s,inp)
+#define SMALL_HASH_DEBUG2(s,inp)
+#define SMALL_HASH_STATS(x)
+#endif /* XD3_DEBUG */
+
+/* Config fields: three structures contain these variables, so this is non-typed. */
+#define XD3_COPY_CONFIG_FIELDS(dst,src)       \
+  do {                                        \
+  (dst)->large_look    = (src)->large_look;   \
+  (dst)->large_step    = (src)->large_step;   \
+  (dst)->small_look    = (src)->small_look;   \
+  (dst)->small_chain   = (src)->small_chain;  \
+  (dst)->small_lchain  = (src)->small_lchain; \
+  (dst)->ssmatch       = (src)->ssmatch;      \
+  (dst)->try_lazy      = (src)->try_lazy;     \
+  (dst)->max_lazy      = (src)->max_lazy;     \
+  (dst)->long_enough   = (src)->long_enough;  \
+  (dst)->promote       = (src)->promote;      \
+  } while (0)
+
+/* Update the run-length state */
+#define NEXTRUN(c) do { if ((c) == run_c) { run_l += 1; } else { run_c = (c); run_l = 1; } } while (0)
+
+/* Update the checksum state. */
+#define LARGE_CKSUM_UPDATE(cksum,base,look)                              \
+  do {                                                                   \
+    uint32_t old_c = PERMUTE((base)[0]);                                    \
+    uint32_t new_c = PERMUTE((base)[(look)]);                               \
+    uint32_t low   = (((cksum) & 0xffff) - old_c + new_c) & 0xffff;         \
+    uint32_t high  = (((cksum) >> 16) - (old_c * (look)) + low) & 0xffff;   \
+    (cksum) = (high << 16) | low;                                        \
+  } while (0)
+
+/* Multiply and add hash function */
+#if ARITH_SMALL_CKSUM
+#define SMALL_CKSUM_UPDATE(cksum,base,look) (cksum) = ((*(unsigned long*)(base+1)) * 71143)
+#else
+#define SMALL_CKSUM_UPDATE LARGE_CKSUM_UPDATE
+#endif
+
+/* Consume N bytes of input, only used by the decoder. */
+#define DECODE_INPUT(n)             \
+  do {                              \
+  stream->total_in += (xoff_t) (n); \
+  stream->avail_in -= (n);          \
+  stream->next_in  += (n);          \
+  } while (0)
+
+/* This CPP-conditional stuff can be cleaned up... */
+#if XD3_DEBUG
+#define IF_DEBUG(x) x
+#define DEBUG_ARG(x) , x
+#else
+#define IF_DEBUG(x)
+#define DEBUG_ARG(x)
+#endif
+#if XD3_DEBUG > 1
+#define IF_DEBUG1(x) x
+#else
+#define IF_DEBUG1(x)
+#endif
+#if REGRESSION_TEST
+#define IF_REGRESSION(x) x
+#else
+#define IF_REGRESSION(x)
+#endif
+
+/******************************************************************************************/
+
+#if XD3_ENCODER
+static void*       xd3_alloc0 (xd3_stream *stream,
+			       usize_t      elts,
+			       usize_t      size);
+
+
+static xd3_output* xd3_alloc_output (xd3_stream *stream,
+				     xd3_output *old_output);
+
+
+
+static void        xd3_free_output (xd3_stream *stream,
+				    xd3_output *output);
+
+static int         xd3_emit_byte (xd3_stream  *stream,
+				  xd3_output **outputp,
+				  uint8_t      code);
+
+static int         xd3_emit_bytes (xd3_stream     *stream,
+				   xd3_output    **outputp,
+				   const uint8_t  *base,
+				   usize_t          size);
+
+static int         xd3_emit_double (xd3_stream *stream, xd3_rinst *first, xd3_rinst *second, uint code);
+static int         xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code);
+
+static usize_t      xd3_sizeof_output (xd3_output *output);
+
+static int         xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos);
+static int         xd3_source_extend_match (xd3_stream *stream);
+static int         xd3_srcwin_setup (xd3_stream *stream);
+static int         xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point);
+static usize_t     xd3_iopt_last_matched (xd3_stream *stream);
+static int         xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num);
+
+#endif /* XD3_ENCODER */
+
+static int         xd3_decode_allocate (xd3_stream  *stream, usize_t       size,
+					uint8_t    **copied1, usize_t      *alloc1,
+					uint8_t    **copied2, usize_t      *alloc2);
+
+static void        xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str);
+static void*       xd3_alloc (xd3_stream *stream, usize_t      elts, usize_t      size);
+static void        xd3_free  (xd3_stream *stream, void       *ptr);
+
+static int         xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp,
+				      const uint8_t *max, uint32_t *valp);
+
+#if REGRESSION_TEST
+static int         xd3_selftest      (void);
+#endif
+
+/******************************************************************************************/
+
+#define UINT32_OFLOW_MASK 0xfe000000U
+#define UINT64_OFLOW_MASK 0xfe00000000000000ULL
+
+#define UINT32_MAX 4294967295U
+#define UINT64_MAX 18446744073709551615ULL
+
+#if SIZEOF_USIZE_T == 4
+#define USIZE_T_MAX        UINT32_MAX
+#define xd3_decode_size   xd3_decode_uint32_t
+#define xd3_emit_size     xd3_emit_uint32_t
+#define xd3_sizeof_size   xd3_sizeof_uint32_t
+#define xd3_read_size     xd3_read_uint32_t
+#elif SIZEOF_USIZE_T == 8
+#define USIZE_T_MAX        UINT64_MAX
+#define xd3_decode_size   xd3_decode_uint64_t
+#define xd3_emit_size     xd3_emit_uint64_t
+#define xd3_sizeof_size   xd3_sizeof_uint64_t
+#define xd3_read_size     xd3_read_uint64_t
+#endif
+
+#if SIZEOF_XOFF_T == 4
+#define XOFF_T_MAX        UINT32_MAX
+#define xd3_decode_offset xd3_decode_uint32_t
+//#define xd3_emit_offset   xd3_emit_uint32_t
+//#define xd3_sizeof_offset xd3_sizeof_uint32_t
+//#define xd3_read_offset   xd3_read_uint32_t
+#elif SIZEOF_XOFF_T == 8
+#define XOFF_T_MAX        UINT64_MAX
+#define xd3_decode_offset xd3_decode_uint64_t
+//#define xd3_emit_offset   xd3_emit_uint64_t
+//#define xd3_sizeof_offset xd3_sizeof_uint64_t
+//#define xd3_read_offset   xd3_read_uint64_t
+#endif
+
+#define USIZE_T_OVERFLOW(a,b) ((USIZE_T_MAX - (usize_t) (a)) < (usize_t) (b))
+#define XOFF_T_OVERFLOW(a,b) ((XOFF_T_MAX - (xoff_t) (a)) < (xoff_t) (b))
+
+const char* xd3_strerror (int ret)
+{
+  switch (ret)
+    {
+    case XD3_INPUT: return "XD3_INPUT";
+    case XD3_OUTPUT: return "XD3_OUTPUT";
+    case XD3_GETSRCBLK: return "XD3_GETSRCBLK";
+    case XD3_GOTHEADER: return "XD3_GOTHEADER";
+    case XD3_WINSTART: return "XD3_WINSTART";
+    case XD3_WINFINISH: return "XD3_WINFINISH";
+    }
+  return strerror (ret);
+}
+
+/******************************************************************************************/
+
+#if SECONDARY_ANY == 0
+#define IF_SEC(x)
+#define IF_NSEC(x) x
+#else /* yuck */
+#define IF_SEC(x) x
+#define IF_NSEC(x)
+#include "xdelta3-second.h"
+#endif /* SECONDARY_ANY */
+
+#if SECONDARY_FGK
+#include "xdelta3-fgk.h"
+
+static const xd3_sec_type fgk_sec_type =
+{
+  VCD_FGK_ID,
+  "FGK Adaptive Huffman",
+  SEC_NOFLAGS,
+  (xd3_sec_stream* (*)())  fgk_alloc,
+  (void (*)())             fgk_destroy,
+  (void (*)())             fgk_init,
+  (int (*)())              xd3_decode_fgk,
+  IF_ENCODER((int (*)())   xd3_encode_fgk)
+};
+
+#define IF_FGK(x) x
+#define FGK_CASE(s) \
+  s->sec_type = & fgk_sec_type; \
+  break;
+#else
+#define IF_FGK(x)
+#define FGK_CASE(s) \
+  s->msg = "unavailable secondary compressor: FGK Adaptive Huffman"; \
+  return EINVAL;
+#endif
+
+#if SECONDARY_DJW
+#include "xdelta3-djw.h"
+
+static const xd3_sec_type djw_sec_type =
+{
+  VCD_DJW_ID,
+  "Static Huffman",
+  SEC_COUNT_FREQS,
+  (xd3_sec_stream* (*)())  djw_alloc,
+  (void (*)())             djw_destroy,
+  (void (*)())             djw_init,
+  (int (*)())              xd3_decode_huff,
+  IF_ENCODER((int (*)())   xd3_encode_huff)
+};
+
+#define IF_DJW(x) x
+#define DJW_CASE(s) \
+  s->sec_type = & djw_sec_type; \
+  break;
+#else
+#define IF_DJW(x)
+#define DJW_CASE(s) \
+  s->msg = "unavailable secondary compressor: DJW Static Huffman"; \
+  return EINVAL;
+#endif
+
+/******************************************************************************************/
+
+/* Abbreviate frequently referenced fields. */
+#define max_in    stream->avail_in
+#define pos_in    stream->input_position
+#define min_match stream->min_match
+
+/* Process the inline pass. */
+#define __XDELTA3_C_INLINE_PASS__
+#include "xdelta3.c"
+#undef __XDELTA3_C_INLINE_PASS__
+
+/* Process template passes - this includes xdelta3.c several times. */
+#define __XDELTA3_C_TEMPLATE_PASS__
+#include "xdelta3-cfgs.h"
+#undef __XDELTA3_C_TEMPLATE_PASS__
+
+#undef max_in
+#undef pos_in
+#undef min_match
+
+#if XD3_MAIN || PYTHON_MODULE
+#include "xdelta3-main.h"
+#endif
+
+#if REGRESSION_TEST
+#include "xdelta3-test.h"
+#endif
+
+#if PYTHON_MODULE
+#include "xdelta3-python.h"
+#endif
+
+#endif /* __XDELTA3_C_HEADER_PASS__ */
+#ifdef __XDELTA3_C_INLINE_PASS__
+
+/******************************************************************************************
+ Instruction tables
+ ******************************************************************************************/
+
+/* The following code implements a parametrized description of the
+ * code table given above for a few reasons.  It is not necessary for
+ * implementing the standard, to support compression with variable
+ * tables, so an implementation is only required to know the default
+ * code table to begin decompression.  (If the encoder uses an
+ * alternate table, the table is included in compressed form inside
+ * the VCDIFF file.)
+ *
+ * Before adding variable-table support there were two functions which
+ * were hard-coded to the default table above.
+ * xd3_compute_default_table() would create the default table by
+ * filling a 256-elt array of xd3_dinst values.  The corresponding
+ * function, xd3_choose_instruction(), would choose an instruction
+ * based on the hard-coded parameters of the default code table.
+ *
+ * Notes: The parametrized code table description here only generates
+ * tables of a certain regularity similar to the default table by
+ * allowing to vary the distribution of single- and
+ * double-instructions and change the number of near and same copy
+ * modes.  More exotic tables are only possible by extending this
+ * code, but a detailed experiment would need to be carried out first,
+ * probably using separate code.  I would like to experiment with a
+ * double-copy instruction, for example.
+ *
+ * For performance reasons, both the parametrized and non-parametrized
+ * versions of xd3_choose_instruction remain.  The parametrized
+ * version is only needed for testing multi-table decoding support.
+ * If ever multi-table encoding is required, this can be optimized by
+ * compiling static functions for each table.
+ */
+
+/* The XD3_CHOOSE_INSTRUCTION calls xd3_choose_instruction with the
+ * table description when GENERIC_ENCODE_TABLES are in use.  The
+ * IF_GENCODETBL macro enables generic-code-table specific code. */
+#if GENERIC_ENCODE_TABLES
+#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (stream->code_table_desc, prev, inst)
+#define IF_GENCODETBL(x) x
+#else
+#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) xd3_choose_instruction (prev, inst)
+#define IF_GENCODETBL(x)
+#endif
+
+/* This structure maintains information needed by
+ * xd3_choose_instruction to compute the code for a double instruction
+ * by first indexing an array of code_table_sizes by copy mode, then
+ * using (offset + (muliplier * X)) */
+struct _xd3_code_table_sizes {
+  uint8_t cpy_max;
+  uint8_t offset;
+  uint8_t mult;
+};
+
+/* This contains a complete description of a code table. */
+struct _xd3_code_table_desc
+{
+  /* Assumes a single RUN instruction */
+  /* Assumes that MIN_MATCH is 4 */
+
+  uint8_t add_sizes;            /* Number of immediate-size single adds (default 17) */
+  uint8_t near_modes;           /* Number of near copy modes (default 4) */
+  uint8_t same_modes;           /* Number of same copy modes (default 3) */
+  uint8_t cpy_sizes;            /* Number of immediate-size single copies (default 15) */
+
+  uint8_t addcopy_add_max;      /* Maximum add size for an add-copy double instruction, all modes (default 4) */
+  uint8_t addcopy_near_cpy_max; /* Maximum cpy size for an add-copy double instruction, up through VCD_NEAR modes (default 6) */
+  uint8_t addcopy_same_cpy_max; /* Maximum cpy size for an add-copy double instruction, VCD_SAME modes (default 4) */
+
+  uint8_t copyadd_add_max;      /* Maximum add size for a copy-add double instruction, all modes (default 1) */
+  uint8_t copyadd_near_cpy_max; /* Maximum cpy size for a copy-add double instruction, up through VCD_NEAR modes (default 4) */
+  uint8_t copyadd_same_cpy_max; /* Maximum cpy size for a copy-add double instruction, VCD_SAME modes (default 4) */
+
+  xd3_code_table_sizes addcopy_max_sizes[MAX_MODES];
+  xd3_code_table_sizes copyadd_max_sizes[MAX_MODES];
+};
+
+/* The rfc3284 code table is represented: */
+static const xd3_code_table_desc __rfc3284_code_table_desc = {
+  17, /* add sizes */
+  4,  /* near modes */
+  3,  /* same modes */
+  15, /* copy sizes */
+
+  4,  /* add-copy max add */
+  6,  /* add-copy max cpy, near */
+  4,  /* add-copy max cpy, same */
+
+  1,  /* copy-add max add */
+  4,  /* copy-add max cpy, near */
+  4,  /* copy-add max cpy, same */
+
+  /* addcopy */
+  { {6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{4,235,1},{4,239,1},{4,243,1} },
+  /* copyadd */
+  { {4,247,1},{4,248,1},{4,249,1},{4,250,1},{4,251,1},{4,252,1},{4,253,1},{4,254,1},{4,255,1} },
+};
+
+#if GENERIC_ENCODE_TABLES
+/* An alternate code table for testing (5 near, 0 same):
+ *
+ *         TYPE      SIZE     MODE    TYPE     SIZE     MODE     INDEX
+ *        ---------------------------------------------------------------
+ *     1.  Run         0        0     Noop       0        0        0
+ *     2.  Add    0, [1,23]     0     Noop       0        0      [1,24]
+ *     3.  Copy   0, [4,20]     0     Noop       0        0     [25,42]
+ *     4.  Copy   0, [4,20]     1     Noop       0        0     [43,60]
+ *     5.  Copy   0, [4,20]     2     Noop       0        0     [61,78]
+ *     6.  Copy   0, [4,20]     3     Noop       0        0     [79,96]
+ *     7.  Copy   0, [4,20]     4     Noop       0        0     [97,114]
+ *     8.  Copy   0, [4,20]     5     Noop       0        0    [115,132]
+ *     9.  Copy   0, [4,20]     6     Noop       0        0    [133,150]
+ *    10.  Add       [1,4]      0     Copy     [4,6]      0    [151,162]
+ *    11.  Add       [1,4]      0     Copy     [4,6]      1    [163,174]
+ *    12.  Add       [1,4]      0     Copy     [4,6]      2    [175,186]
+ *    13.  Add       [1,4]      0     Copy     [4,6]      3    [187,198]
+ *    14.  Add       [1,4]      0     Copy     [4,6]      4    [199,210]
+ *    15.  Add       [1,4]      0     Copy     [4,6]      5    [211,222]
+ *    16.  Add       [1,4]      0     Copy     [4,6]      6    [223,234]
+ *    17.  Copy        4      [0,6]   Add      [1,3]      0    [235,255]
+ *        --------------------------------------------------------------- */
+static const xd3_code_table_desc __alternate_code_table_desc = {
+  23, /* add sizes */
+  5,  /* near modes */
+  0,  /* same modes */
+  17, /* copy sizes */
+
+  4,  /* add-copy max add */
+  6,  /* add-copy max cpy, near */
+  0,  /* add-copy max cpy, same */
+
+  3,  /* copy-add max add */
+  4,  /* copy-add max cpy, near */
+  0,  /* copy-add max cpy, same */
+
+  /* addcopy */
+  { {6,151,3},{6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{0,0,0},{0,0,0} },
+  /* copyadd */
+  { {4,235,1},{4,238,1},{4,241,1},{4,244,1},{4,247,1},{4,250,1},{4,253,1},{0,0,0},{0,0,0} },
+};
+#endif
+
+/* Computes code table entries of TBL using the specified description. */
+static void
+xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl)
+{
+  int size1, size2, mode;
+  int cpy_modes = 2 + desc->near_modes + desc->same_modes;
+  xd3_dinst *d = tbl;
+
+  (d++)->type1 = XD3_RUN;
+  (d++)->type1 = XD3_ADD;
+
+  for (size1 = 1; size1 <= desc->add_sizes; size1 += 1, d += 1)
+    {
+      d->type1 = XD3_ADD;
+      d->size1 = size1;
+    }
+
+  for (mode = 0; mode < cpy_modes; mode += 1)
+    {
+      (d++)->type1 = XD3_CPY + mode;
+
+      for (size1 = MIN_MATCH; size1 < MIN_MATCH + desc->cpy_sizes; size1 += 1, d += 1)
+	{
+	  d->type1 = XD3_CPY + mode;
+	  d->size1 = size1;
+	}
+    }
+
+  for (mode = 0; mode < cpy_modes; mode += 1)
+    {
+      for (size1 = 1; size1 <= desc->addcopy_add_max; size1 += 1)
+	{
+	  int max = (mode < 2 + desc->near_modes) ? desc->addcopy_near_cpy_max : desc->addcopy_same_cpy_max;
+
+	  for (size2 = MIN_MATCH; size2 <= max; size2 += 1, d += 1)
+	    {
+	      d->type1 = XD3_ADD;
+	      d->size1 = size1;
+	      d->type2 = XD3_CPY + mode;
+	      d->size2 = size2;
+	    }
+	}
+    }
+
+  for (mode = 0; mode < cpy_modes; mode += 1)
+    {
+      int max = (mode < 2 + desc->near_modes) ? desc->copyadd_near_cpy_max : desc->copyadd_same_cpy_max;
+
+      for (size1 = MIN_MATCH; size1 <= max; size1 += 1)
+	{
+	  for (size2 = 1; size2 <= desc->copyadd_add_max; size2 += 1, d += 1)
+	    {
+	      d->type1 = XD3_CPY + mode;
+	      d->size1 = size1;
+	      d->type2 = XD3_ADD;
+	      d->size2 = size2;
+	    }
+	}
+    }
+
+  XD3_ASSERT (d - tbl == 256);
+}
+
+/* This function generates the static default code table. */
+static const xd3_dinst*
+xd3_rfc3284_code_table (void)
+{
+  static xd3_dinst __rfc3284_code_table[256];
+
+  if (__rfc3284_code_table[0].type1 != XD3_RUN)
+    {
+      xd3_build_code_table (& __rfc3284_code_table_desc, __rfc3284_code_table);
+    }
+
+  return __rfc3284_code_table;
+}
+
+#if XD3_ENCODER
+#if GENERIC_ENCODE_TABLES
+/* This function generates the alternate code table. */
+static const xd3_dinst*
+xd3_alternate_code_table (void)
+{
+  static xd3_dinst __alternate_code_table[256];
+
+  if (__alternate_code_table[0].type1 != XD3_RUN)
+    {
+      xd3_build_code_table (& __alternate_code_table_desc, __alternate_code_table);
+    }
+
+  return __alternate_code_table;
+}
+
+/* This function computes the ideal second instruction INST based on preceding instruction
+ * PREV.  If it is possible to issue a double instruction based on this pair it sets
+ * PREV->code2, otherwise it sets INST->code1. */
+static void
+xd3_choose_instruction (const xd3_code_table_desc *desc, xd3_rinst *prev, xd3_rinst *inst)
+{
+  switch (inst->type)
+    {
+    case XD3_RUN:
+      /* The 0th instruction is RUN */
+      inst->code1 = 0;
+      break;
+
+    case XD3_ADD:
+
+      if (inst->size > desc->add_sizes)
+	{
+	  /* The first instruction is non-immediate ADD */
+	  inst->code1 = 1;
+	}
+      else
+	{
+	  /* The following ADD_SIZES instructions are immediate ADDs */
+	  inst->code1 = 1 + inst->size;
+
+	  /* Now check for a possible COPY-ADD double instruction */
+	  if (prev != NULL)
+	    {
+	      int prev_mode = prev->type - XD3_CPY;
+
+	      /* If previous is a copy.  Note: as long as the previous is not a RUN
+	       * instruction, it should be a copy because it cannot be an add.  This check
+	       * is more clear. */
+	      if (prev_mode >= 0 && inst->size <= desc->copyadd_add_max)
+		{
+		  const xd3_code_table_sizes *sizes = & desc->copyadd_max_sizes[prev_mode];
+
+		  /* This check and the inst->size-<= above are == in the default table. */
+		  if (prev->size <= sizes->cpy_max)
+		    {
+		      /* The second and third exprs are 0 in the default table. */
+		      prev->code2 = sizes->offset + (sizes->mult * (prev->size - MIN_MATCH)) + (inst->size - MIN_ADD);
+		    }
+		}
+	    }
+	}
+      break;
+
+    default:
+      {
+	int mode = inst->type - XD3_CPY;
+
+	/* The large copy instruction is offset by the run, large add, and immediate adds,
+	 * then multipled by the number of immediate copies plus one (the large copy)
+	 * (i.e., if there are 15 immediate copy instructions then there are 16 copy
+	 * instructions per mode). */
+	inst->code1 = 2 + desc->add_sizes + (1 + desc->cpy_sizes) * mode;
+
+	/* Now if the copy is short enough for an immediate instruction. */
+	if (inst->size < MIN_MATCH + desc->cpy_sizes)
+	  {
+	    inst->code1 += inst->size + 1 - MIN_MATCH;
+
+	    /* Now check for a possible ADD-COPY double instruction. */
+	    if ( (prev != NULL) &&
+		 (prev->type == XD3_ADD) &&
+		 (prev->size <= desc->addcopy_add_max) )
+	      {
+		const xd3_code_table_sizes *sizes = & desc->addcopy_max_sizes[mode];
+
+		if (inst->size <= sizes->cpy_max)
+		  {
+		    prev->code2 = sizes->offset + (sizes->mult * (prev->size - MIN_ADD)) + (inst->size - MIN_MATCH);
+		  }
+	      }
+	  }
+      }
+    }
+}
+#else /* GENERIC_ENCODE_TABLES */
+
+/* This version of xd3_choose_instruction is hard-coded for the default table. */
+static void
+xd3_choose_instruction (/* const xd3_code_table_desc *desc,*/ xd3_rinst *prev, xd3_rinst *inst)
+{
+  switch (inst->type)
+    {
+    case XD3_RUN:
+      inst->code1 = 0;
+      break;
+
+    case XD3_ADD:
+      inst->code1 = 1;
+
+      if (inst->size <= 17)
+	{
+	  inst->code1 += inst->size;
+
+	  if ( (inst->size == 1) &&
+	       (prev != NULL) &&
+	       (prev->size == 4) &&
+	       (prev->type >= XD3_CPY) )
+	    {
+	      prev->code2 = 247 + (prev->type - XD3_CPY);
+	    }
+	}
+
+      break;
+
+    default:
+      {
+	int mode = inst->type - XD3_CPY;
+
+	XD3_ASSERT (inst->type >= XD3_CPY && inst->type < 12);
+
+	inst->code1 = 19 + 16 * mode;
+
+	if (inst->size <= 18)
+	  {
+	    inst->code1 += inst->size - 3;
+
+	    if ( (prev != NULL) &&
+		 (prev->type == XD3_ADD) &&
+		 (prev->size <= 4) )
+	      {
+		if ( (inst->size <= 6) &&
+		     (mode       <= 5) )
+		  {
+		    prev->code2 = 163 + (mode * 12) + (3 * (prev->size - 1)) + (inst->size - 4);
+
+		    XD3_ASSERT (prev->code2 <= 234);
+		  }
+		else if ( (inst->size == 4) &&
+			  (mode       >= 6) )
+		  {
+		    prev->code2 = 235 + ((mode - 6) * 4) + (prev->size - 1);
+
+		    XD3_ASSERT (prev->code2 <= 246);
+		  }
+	      }
+	  }
+
+	XD3_ASSERT (inst->code1 <= 162);
+      }
+      break;
+    }
+}
+#endif /* GENERIC_ENCODE_TABLES */
+
+/******************************************************************************************
+ Instruction table encoder/decoder
+ ******************************************************************************************/
+
+#if GENERIC_ENCODE_TABLES
+#if GENERIC_ENCODE_TABLES_COMPUTE == 0
+
+/* In this case, we hard-code the result of compute_code_table_encoding for each alternate
+ * code table, presuming that saves time/space.  This has been 131 bytes, but secondary
+ * compression was turned off. */
+static const uint8_t __alternate_code_table_compressed[178] =
+{0xd6,0xc3,0xc4,0x00,0x00,0x01,0x8a,0x6f,0x40,0x81,0x27,0x8c,0x00,0x00,0x4a,0x4a,0x0d,0x02,0x01,0x03,
+0x01,0x03,0x00,0x01,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,
+0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x00,0x01,0x01,0x01,0x02,0x02,0x02,0x03,0x03,0x03,0x04,
+0x04,0x04,0x04,0x00,0x04,0x05,0x06,0x01,0x02,0x03,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x05,0x05,0x05,
+0x06,0x06,0x06,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x00,0x02,0x00,0x18,0x13,0x63,0x00,0x1b,0x00,0x54,
+0x00,0x15,0x23,0x6f,0x00,0x28,0x13,0x54,0x00,0x15,0x01,0x1a,0x31,0x23,0x6c,0x0d,0x23,0x48,0x00,0x15,
+0x93,0x6f,0x00,0x28,0x04,0x23,0x51,0x04,0x32,0x00,0x2b,0x00,0x12,0x00,0x12,0x00,0x12,0x00,0x12,0x00,
+0x12,0x00,0x12,0x53,0x57,0x9c,0x07,0x43,0x6f,0x00,0x34,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00,0x0c,0x00,
+0x0c,0x00,0x0c,0x00,0x15,0x00,0x82,0x6f,0x00,0x15,0x12,0x0c,0x00,0x03,0x03,0x00,0x06,0x00,};
+
+static int
+xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size)
+{
+  (*data) = __alternate_code_table_compressed;
+  (*size) = sizeof (__alternate_code_table_compressed);
+  return 0;
+}
+
+#else
+
+/* The alternate code table will be computed and stored here. */
+static uint8_t __alternate_code_table_compressed[CODE_TABLE_VCDIFF_SIZE];
+static usize_t  __alternate_code_table_compressed_size;
+
+/* This function generates a delta describing the code table for encoding within a VCDIFF
+ * file.  This function is NOT thread safe because it is only intended that this function
+ * is used to generate statically-compiled strings. */
+int xd3_compute_code_table_encoding (xd3_stream *in_stream, const xd3_dinst *code_table,
+				     uint8_t *comp_string, usize_t *comp_string_size)
+{
+  uint8_t dflt_string[CODE_TABLE_STRING_SIZE];
+  uint8_t code_string[CODE_TABLE_STRING_SIZE];
+  xd3_stream stream;
+  xd3_source source;
+  xd3_config config;
+  int ret;
+
+  memset (& source, 0, sizeof (source));
+
+  xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string);
+  xd3_compute_code_table_string (code_table, code_string);
+
+  /* Use DJW secondary compression if it is on by default.  This saves about 20 bytes. */
+  xd3_init_config (& config, XD3_FLUSH | (SECONDARY_DJW ? XD3_SEC_DJW : 0));
+
+  /* Be exhaustive. */
+  config.sprevsz = 1<<11;
+  config.memsize = CODE_TABLE_STRING_SIZE * 10;
+
+  config.large_look    = 4;
+  config.large_step    = 1;
+  config.small_look    = 4;
+  config.small_chain   = CODE_TABLE_STRING_SIZE;
+  config.small_lchain  = CODE_TABLE_STRING_SIZE;
+  config.ssmatch       = 1;
+  config.try_lazy      = 1;
+  config.max_lazy      = CODE_TABLE_STRING_SIZE;
+  config.long_enough   = CODE_TABLE_STRING_SIZE;
+  config.promote       = 1;
+  config.srcwin_size   = CODE_TABLE_STRING_SIZE;
+  config.srcwin_maxsz  = CODE_TABLE_STRING_SIZE;
+
+  if ((ret = xd3_config_stream (& stream, & config))) { goto fail; }
+
+  source.size     = CODE_TABLE_STRING_SIZE;
+  source.blksize  = CODE_TABLE_STRING_SIZE;
+  source.onblk    = CODE_TABLE_STRING_SIZE;
+  source.name     = "";
+  source.curblk   = dflt_string;
+  source.curblkno = 0;
+
+  if ((ret = xd3_set_source (& stream, & source))) { goto fail; }
+
+  if ((ret = xd3_encode_completely (& stream, code_string, CODE_TABLE_STRING_SIZE,
+				    comp_string, comp_string_size, CODE_TABLE_VCDIFF_SIZE))) { goto fail; }
+
+ fail:
+
+  in_stream->msg = stream.msg;
+  xd3_free_stream (& stream);
+  return ret;
+}
+
+/* Compute a delta between alternate and rfc3284 tables.  As soon as another alternate
+ * table is added, this code should become generic.  For now there is only one alternate
+ * table for testing. */
+static int
+xd3_compute_alternate_table_encoding (xd3_stream *stream, const uint8_t **data, usize_t *size)
+{
+  int ret;
+
+  if (__alternate_code_table_compressed[0] == 0)
+    {
+      if ((ret = xd3_compute_code_table_encoding (stream, xd3_alternate_code_table (),
+						  __alternate_code_table_compressed,
+						  & __alternate_code_table_compressed_size)))
+	{
+	  return ret;
+	}
+
+      /* During development of a new code table, enable this variable to print the new
+       * static contents and determine its size.  At run time the table will be filled in
+       * appropriately, but at least it should have the proper size beforehand. */
+#if GENERIC_ENCODE_TABLES_COMPUTE_PRINT
+      {
+	int i;
+
+	P(RINT, "\nstatic const usize_t __alternate_code_table_compressed_size = %u;\n",
+		 __alternate_code_table_compressed_size);
+
+	P(RINT, "static const uint8_t __alternate_code_table_compressed[%u] =\n{",
+		 __alternate_code_table_compressed_size);
+
+	for (i = 0; i < __alternate_code_table_compressed_size; i += 1)
+	  {
+	    P(RINT, "0x%02x,", __alternate_code_table_compressed[i]);
+	    if ((i % 20) == 19) { P(RINT, "\n"); }
+	  }
+
+	P(RINT, "};\n");
+      }
+#endif
+    }
+
+  (*data) = __alternate_code_table_compressed;
+  (*size) = __alternate_code_table_compressed_size;
+
+  return 0;
+}
+#endif /* GENERIC_ENCODE_TABLES_COMPUTE != 0 */
+#endif /* GENERIC_ENCODE_TABLES */
+
+#endif /* XD3_ENCODER */
+
+/* This function generates the 1536-byte string specified in sections 5.4 and 7 of
+ * rfc3284, which is used to represent a code table within a VCDIFF file. */
+void xd3_compute_code_table_string (const xd3_dinst *code_table, uint8_t *str)
+{
+  int i, s;
+
+  XD3_ASSERT (CODE_TABLE_STRING_SIZE == 6 * 256);
+
+  for (s = 0; s < 6; s += 1)
+    {
+      for (i = 0; i < 256; i += 1)
+	{
+	  switch (s)
+	    {
+	    case 0: *str++ = (code_table[i].type1 >= XD3_CPY ? XD3_CPY : code_table[i].type1); break;
+	    case 1: *str++ = (code_table[i].type2 >= XD3_CPY ? XD3_CPY : code_table[i].type2); break;
+	    case 2: *str++ = (code_table[i].size1); break;
+	    case 3: *str++ = (code_table[i].size2); break;
+	    case 4: *str++ = (code_table[i].type1 >= XD3_CPY ? code_table[i].type1 - XD3_CPY : 0); break;
+	    case 5: *str++ = (code_table[i].type2 >= XD3_CPY ? code_table[i].type2 - XD3_CPY : 0); break;
+	    }
+	}
+    }
+}
+
+/* This function translates the code table string into the internal representation.  The
+ * stream's near and same-modes should already be set. */
+static int
+xd3_apply_table_string (xd3_stream *stream, const uint8_t *code_string)
+{
+  int i, s;
+  int modes = TOTAL_MODES (stream);
+  xd3_dinst *code_table;
+
+  if ((code_table = stream->code_table_alloc = xd3_alloc (stream, sizeof (xd3_dinst), 256)) == NULL)
+    {
+      return ENOMEM;
+    }
+
+  for (s = 0; s < 6; s += 1)
+    {
+      for (i = 0; i < 256; i += 1)
+	{
+	  switch (s)
+	    {
+	    case 0:
+	      if (*code_string > XD3_CPY)
+		{
+		  stream->msg = "invalid code-table opcode";
+		  return EINVAL;
+		}
+	      code_table[i].type1 = *code_string++;
+	      break;
+	    case 1:
+	      if (*code_string > XD3_CPY)
+		{
+		  stream->msg = "invalid code-table opcode";
+		  return EINVAL;
+		}
+	      code_table[i].type2 = *code_string++;
+	      break;
+	    case 2:
+	      if (*code_string != 0 && code_table[i].type1 == XD3_NOOP)
+		{
+		  stream->msg = "invalid code-table size";
+		  return EINVAL;
+		}
+	      code_table[i].size1 = *code_string++;
+	      break;
+	    case 3:
+	      if (*code_string != 0 && code_table[i].type2 == XD3_NOOP)
+		{
+		  stream->msg = "invalid code-table size";
+		  return EINVAL;
+		}
+	      code_table[i].size2 = *code_string++;
+	      break;
+	    case 4:
+	      if (*code_string >= modes)
+		{
+		  stream->msg = "invalid code-table mode";
+		  return EINVAL;
+		}
+	      if (*code_string != 0 && code_table[i].type1 != XD3_CPY)
+		{
+		  stream->msg = "invalid code-table mode";
+		  return EINVAL;
+		}
+	      code_table[i].type1 += *code_string++;
+	      break;
+	    case 5:
+	      if (*code_string >= modes)
+		{
+		  stream->msg = "invalid code-table mode";
+		  return EINVAL;
+		}
+	      if (*code_string != 0 && code_table[i].type2 != XD3_CPY)
+		{
+		  stream->msg = "invalid code-table mode";
+		  return EINVAL;
+		}
+	      code_table[i].type2 += *code_string++;
+	      break;
+	    }
+	}
+    }
+
+  stream->code_table = code_table;
+  return 0;
+}
+
+/* This function applies a code table delta and returns an actual code table. */
+static int
+xd3_apply_table_encoding (xd3_stream *in_stream, const uint8_t *data, usize_t size)
+{
+  uint8_t dflt_string[CODE_TABLE_STRING_SIZE];
+  uint8_t code_string[CODE_TABLE_STRING_SIZE];
+  usize_t code_size;
+  xd3_stream stream;
+  xd3_source source;
+  int ret;
+
+  /* The default code table string can be cached if alternate code tables ever become
+   * popular. */
+  xd3_compute_code_table_string (xd3_rfc3284_code_table (), dflt_string);
+
+  source.size     = CODE_TABLE_STRING_SIZE;
+  source.blksize  = CODE_TABLE_STRING_SIZE;
+  source.onblk    = CODE_TABLE_STRING_SIZE;
+  source.name     = "rfc3284 code table";
+  source.curblk   = dflt_string;
+  source.curblkno = 0;
+
+  if ((ret = xd3_config_stream (& stream, NULL)) ||
+      (ret = xd3_set_source (& stream, & source)) ||
+      (ret = xd3_decode_completely (& stream, data, size, code_string, & code_size, sizeof (code_string))))
+    {
+      in_stream->msg = stream.msg;
+      goto fail;
+    }
+
+  if (code_size != sizeof (code_string))
+    {
+      stream.msg = "corrupt code-table encoding";
+      ret = EINVAL;
+      goto fail;
+    }
+
+  if ((ret = xd3_apply_table_string (in_stream, code_string))) { goto fail; }
+
+ fail:
+
+  xd3_free_stream (& stream);
+  return ret;
+}
+
+/******************************************************************************************
+ Permute stuff
+ ******************************************************************************************/
+
+#if HASH_PERMUTE == 0
+#define PERMUTE(x) (x)
+#else
+#define PERMUTE(x) (__single_hash[(uint)x])
+
+static const uint16_t __single_hash[256] =
+{
+  /* Random numbers generated using SLIB's pseudo-random number generator.  This hashes
+   * the input alphabet. */
+  0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46,
+  0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602,
+  0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6,
+  0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a,
+  0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58,
+  0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b,
+  0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94,
+  0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176,
+  0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743,
+  0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6,
+  0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227,
+  0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d,
+  0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a,
+  0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31,
+  0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9,
+  0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6,
+  0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f,
+  0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f,
+  0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49,
+  0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037,
+  0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad,
+  0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f,
+  0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d,
+  0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b,
+  0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624,
+  0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272,
+  0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806,
+  0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050,
+  0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113,
+  0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2,
+  0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25,
+  0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5
+};
+#endif
+
+/******************************************************************************************
+ Ctable stuff
+ ******************************************************************************************/
+
+#if HASH_PRIME
+static const usize_t __primes[] =
+{
+  11, 19, 37, 73, 109,
+  163, 251, 367, 557, 823,
+  1237, 1861, 2777, 4177, 6247,
+  9371, 14057, 21089, 31627, 47431,
+  71143, 106721, 160073, 240101, 360163,
+  540217, 810343, 1215497, 1823231, 2734867,
+  4102283, 6153409, 9230113, 13845163, 20767711,
+  31151543, 46727321, 70090921, 105136301, 157704401,
+  236556601, 354834919, 532252367, 798378509, 1197567719,
+  1796351503
+};
+
+static const usize_t __nprimes = SIZEOF_ARRAY (__primes);
+#endif
+
+static INLINE uint32_t
+xd3_checksum_hash (const xd3_hash_cfg *cfg, const uint32_t cksum)
+{
+#if HASH_PRIME
+  /* If the table is prime compute the modulus. */
+  return (cksum % cfg->size);
+#else
+  /* If the table is power-of-two compute the mask.*/
+  return (cksum ^ (cksum >> cfg->shift)) & cfg->mask;
+#endif
+}
+
+/******************************************************************************************
+ Create the hash table.
+ ******************************************************************************************/
+
+static INLINE void
+xd3_swap_uint8p (uint8_t** p1, uint8_t** p2)
+{
+  uint8_t *t = (*p1);
+  (*p1) = (*p2);
+  (*p2) = t;
+}
+
+static INLINE void
+xd3_swap_usize_t (usize_t* p1, usize_t* p2)
+{
+  usize_t t = (*p1);
+  (*p1) = (*p2);
+  (*p2) = t;
+}
+
+/* It's not constant time, but it computes the log. */
+static int
+xd3_check_pow2 (usize_t value, usize_t *logof)
+{
+  usize_t x = 1;
+  usize_t nolog;
+  if (logof == NULL) {
+    logof = &nolog;
+  }
+
+  *logof = 0;
+
+  for (; x != 0; x <<= 1, *logof += 1)
+    {
+      if (x == value)
+	{
+	  return 0;
+	}
+    }
+
+  return EINVAL;
+}
+
+static usize_t
+xd3_round_blksize (usize_t sz, usize_t blksz)
+{
+  usize_t mod = sz & (blksz-1);
+
+  XD3_ASSERT (xd3_check_pow2 (blksz, NULL) == 0);
+
+  return mod ? (sz + (blksz - mod)) : sz;
+}
+
+#if XD3_ENCODER
+#if !HASH_PRIME
+static usize_t
+xd3_size_log2 (usize_t slots)
+{
+  int bits = 28; /* This should not be an unreasonable limit. */
+  int i;
+
+  for (i = 3; i <= bits; i += 1)
+    {
+      if (slots < (1 << i))
+	{
+	  bits = i-1;
+	  break;
+	}
+    }
+
+  return bits;
+}
+#endif
+
+static void
+xd3_size_hashtable (xd3_stream    *stream,
+		    usize_t         space,
+		    xd3_hash_cfg  *cfg)
+{
+  usize_t slots = space / sizeof (usize_t);
+
+  /* initialize ctable: the number of hash buckets is computed from the table of primes or
+   * the nearest power-of-two, in both cases rounding down in favor of using less
+   * memory. */
+
+#if HASH_PRIME
+  usize_t i;
+
+  cfg->size = __primes[__nprimes-1];
+
+  for (i = 1; i < __nprimes; i += 1)
+    {
+      if (slots < __primes[i])
+	{
+	  cfg->size = __primes[i-1];
+	  break;
+	}
+    }
+#else
+  int bits = xd3_size_log2 (slots);
+
+  cfg->size  = (1 << bits);
+  cfg->mask  = (cfg->size - 1);
+  cfg->shift = min (32 - bits, 16);
+#endif
+}
+#endif
+
+/******************************************************************************************
+ Cksum function
+ ******************************************************************************************/
+
+/* OPT: It turns out that the compiler can't unroll the loop as well as you can by hand. */
+static INLINE uint32_t
+xd3_lcksum (const uint8_t *seg, const int ln)
+{
+  int   i    = 0;
+  uint32_t low  = 0;
+  uint32_t high = 0;
+
+  for (; i < ln; i += 1)
+    {
+      low  += PERMUTE(*seg++);
+      high += low;
+    }
+
+  return ((high & 0xffff) << 16) | (low & 0xffff);
+}
+
+#if ARITH_SMALL_CKSUM
+static INLINE usize_t
+xd3_scksum (const uint8_t *seg, const int ln)
+{
+  usize_t c;
+  /* The -1 is because UPDATE operates on seg[1..ln] */
+  SMALL_CKSUM_UPDATE (c,(seg-1),ln);
+  return c;
+}
+#else
+#define xd3_scksum(seg,ln) xd3_lcksum(seg,ln)
+#endif
+
+/******************************************************************************************
+ Adler32 stream function: code copied from Zlib, defined in RFC1950
+ ******************************************************************************************/
+
+#define A32_BASE 65521L /* Largest prime smaller than 2^16 */
+#define A32_NMAX 5552   /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define A32_DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
+#define A32_DO2(buf,i)  A32_DO1(buf,i); A32_DO1(buf,i+1);
+#define A32_DO4(buf,i)  A32_DO2(buf,i); A32_DO2(buf,i+2);
+#define A32_DO8(buf,i)  A32_DO4(buf,i); A32_DO4(buf,i+4);
+#define A32_DO16(buf)   A32_DO8(buf,0); A32_DO8(buf,8);
+
+static unsigned long adler32 (unsigned long adler, const uint8_t *buf, usize_t len)
+{
+    unsigned long s1 = adler & 0xffff;
+    unsigned long s2 = (adler >> 16) & 0xffff;
+    int k;
+
+    while (len > 0)
+      {
+        k    = (len < A32_NMAX) ? len : A32_NMAX;
+        len -= k;
+
+	while (k >= 16)
+	  {
+	    A32_DO16(buf);
+	    buf += 16;
+            k -= 16;
+	  }
+
+	if (k != 0)
+	  {
+	    do
+	      {
+		s1 += *buf++;
+		s2 += s1;
+	      }
+	    while (--k);
+	  }
+
+        s1 %= A32_BASE;
+        s2 %= A32_BASE;
+    }
+
+    return (s2 << 16) | s1;
+}
+
+/******************************************************************************************
+ Run-length function
+ ******************************************************************************************/
+
+static INLINE int
+xd3_comprun (const uint8_t *seg, int slook, uint8_t *run_cp)
+{
+  int i;
+  int     run_l = 0;
+  uint8_t run_c = 0;
+
+  for (i = 0; i < slook; i += 1)
+    {
+      NEXTRUN(seg[i]);
+    }
+
+  (*run_cp) = run_c;
+
+  return run_l;
+}
+
+/******************************************************************************************
+ Basic encoder/decoder functions
+ ******************************************************************************************/
+
+static int
+xd3_decode_byte (xd3_stream *stream, uint *val)
+{
+  if (stream->avail_in == 0)
+    {
+      stream->msg = "further input required";
+      return XD3_INPUT;
+    }
+
+  (*val) = stream->next_in[0];
+
+  DECODE_INPUT (1);
+  return 0;
+}
+
+static int
+xd3_decode_bytes (xd3_stream *stream, uint8_t *buf, usize_t *pos, usize_t size)
+{
+  usize_t want;
+  usize_t take;
+
+  /* Note: The case where (*pos == size) happens when a zero-length appheader or code
+   * table is transmitted, but there is nothing in the standard against that. */
+
+  while (*pos < size)
+    {
+      if (stream->avail_in == 0)
+	{
+	  stream->msg = "further input required";
+	  return XD3_INPUT;
+	}
+
+      want = size - *pos;
+      take = min (want, stream->avail_in);
+
+      memcpy (buf + *pos, stream->next_in, take);
+
+      DECODE_INPUT (take);
+      (*pos) += take;
+    }
+
+  return 0;
+}
+
+#if XD3_ENCODER
+static int
+xd3_emit_byte (xd3_stream  *stream,
+	       xd3_output **outputp,
+	       uint8_t      code)
+{
+  xd3_output *output = (*outputp);
+
+  if (output->next == output->avail)
+    {
+      xd3_output *aoutput;
+
+      if ((aoutput = xd3_alloc_output (stream, output)) == NULL)
+	{
+	  return ENOMEM;
+	}
+
+      output = (*outputp) = aoutput;
+    }
+
+  output->base[output->next++] = code;
+
+  return 0;
+}
+
+static int
+xd3_emit_bytes (xd3_stream     *stream,
+		xd3_output    **outputp,
+		const uint8_t  *base,
+		usize_t          size)
+{
+  xd3_output *output = (*outputp);
+
+  do
+    {
+      usize_t take;
+
+      if (output->next == output->avail)
+	{
+	  xd3_output *aoutput;
+
+	  if ((aoutput = xd3_alloc_output (stream, output)) == NULL)
+	    {
+	      return ENOMEM;
+	    }
+
+	  output = (*outputp) = aoutput;
+	}
+
+      take = min (output->avail - output->next, size);
+
+      memcpy (output->base + output->next, base, take);
+
+      output->next += take;
+      size -= take;
+      base += take;
+    }
+  while (size > 0);
+
+  return 0;
+}
+#endif /* XD3_ENCODER */
+
+/******************************************************************************************
+ Integer encoder/decoder functions
+ ******************************************************************************************/
+
+#define DECODE_INTEGER_TYPE(PART,OFLOW)                                \
+  while (stream->avail_in != 0)                                        \
+    {                                                                  \
+      uint next = stream->next_in[0];                                  \
+                                                                       \
+      DECODE_INPUT(1);                                                 \
+                                                                       \
+      if (PART & OFLOW)                                                \
+	{                                                              \
+	  stream->msg = "overflow in decode_integer";                  \
+	  return EINVAL;                                               \
+	}                                                              \
+                                                                       \
+      PART = (PART << 7) | (next & 127);                               \
+                                                                       \
+      if ((next & 128) == 0)                                           \
+	{                                                              \
+	  (*val) = PART;                                               \
+	  PART = 0;                                                    \
+	  return 0;                                                    \
+	}                                                              \
+    }                                                                  \
+                                                                       \
+  stream->msg = "further input required";                              \
+  return XD3_INPUT
+
+#define READ_INTEGER_TYPE(TYPE, OFLOW)                                 \
+  TYPE val = 0;                                                        \
+  const uint8_t *inp = (*inpp);                                        \
+  uint next;                                                           \
+                                                                       \
+  do                                                                   \
+    {                                                                  \
+      if (inp == max)                                                  \
+	{                                                              \
+	  stream->msg = "end-of-input in read_integer";                \
+	  return EINVAL;                                               \
+	}                                                              \
+                                                                       \
+      if (val & OFLOW)                                                 \
+	{                                                              \
+	  stream->msg = "overflow in read_intger";                     \
+	  return EINVAL;                                               \
+	}                                                              \
+                                                                       \
+      next = (*inp++);                                                 \
+      val  = (val << 7) | (next & 127);                                \
+    }                                                                  \
+  while (next & 128);                                                  \
+                                                                       \
+  (*valp) = val;                                                       \
+  (*inpp) = inp;                                                       \
+                                                                       \
+  return 0
+
+#define EMIT_INTEGER_TYPE()                                            \
+  /* max 64-bit value in base-7 encoding is 9.1 bytes */               \
+  uint8_t buf[10];                                                     \
+  usize_t  bufi = 10;                                                   \
+                                                                       \
+  XD3_ASSERT (num >= 0);                                               \
+                                                                       \
+  /* This loop performs division and turns on all MSBs. */             \
+  do                                                                   \
+    {                                                                  \
+      buf[--bufi] = (num & 127) | 128;                                 \
+      num >>= 7;                                                       \
+    }                                                                  \
+  while (num != 0);                                                    \
+                                                                       \
+  /* Turn off MSB of the last byte. */                                 \
+  buf[9] &= 127;                                                       \
+                                                                       \
+  XD3_ASSERT (bufi >= 0);                                              \
+                                                                       \
+  return xd3_emit_bytes (stream, output, buf + bufi, 10 - bufi)
+
+#define IF_SIZEOF32(x) if (num < (1U   << (7 * (x)))) return (x);
+#define IF_SIZEOF64(x) if (num < (1ULL << (7 * (x)))) return (x);
+
+#if USE_UINT32
+static uint
+xd3_sizeof_uint32_t (uint32_t num)
+{
+  IF_SIZEOF32(1);
+  IF_SIZEOF32(2);
+  IF_SIZEOF32(3);
+  IF_SIZEOF32(4);
+
+  return 5;
+}
+
+static int
+xd3_decode_uint32_t (xd3_stream *stream, uint32_t *val)
+{ DECODE_INTEGER_TYPE (stream->dec_32part, UINT32_OFLOW_MASK); }
+static int
+xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp, const uint8_t *max, uint32_t *valp)
+{ READ_INTEGER_TYPE (uint32_t, UINT32_OFLOW_MASK); }
+#if XD3_ENCODER
+static int
+xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num)
+{ EMIT_INTEGER_TYPE (); }
+#endif
+#endif
+
+#if USE_UINT64
+/* We only ever decode offsets, but the other three are part of the regression test
+ * anyway. */
+static int
+xd3_decode_uint64_t (xd3_stream *stream, uint64_t *val)
+{ DECODE_INTEGER_TYPE (stream->dec_64part, UINT64_OFLOW_MASK); }
+#if REGRESSION_TEST
+#if XD3_ENCODER
+static int
+xd3_emit_uint64_t (xd3_stream *stream, xd3_output **output, uint64_t num)
+{ EMIT_INTEGER_TYPE (); }
+#endif
+static int
+xd3_read_uint64_t (xd3_stream *stream, const uint8_t **inpp, const uint8_t *max, uint64_t *valp)
+{ READ_INTEGER_TYPE (uint64_t, UINT64_OFLOW_MASK); }
+
+static uint
+xd3_sizeof_uint64_t (uint64_t num)
+{
+  IF_SIZEOF64(1);
+  IF_SIZEOF64(2);
+  IF_SIZEOF64(3);
+  IF_SIZEOF64(4);
+  IF_SIZEOF64(5);
+  IF_SIZEOF64(6);
+  IF_SIZEOF64(7);
+  IF_SIZEOF64(8);
+  IF_SIZEOF64(9);
+
+  return 10;
+}
+#endif
+#endif
+
+/******************************************************************************************
+ Debug instruction statistics
+ ******************************************************************************************/
+
+#if XD3_DEBUG
+static void
+xd3_count_inst (xd3_stream *stream, uint code)
+{
+  IF_DEBUG1 ({
+    if (stream->i_freqs == NULL &&
+	(stream->i_freqs = xd3_alloc0 (stream, sizeof (stream->i_freqs[0]), 256)) == NULL) { abort (); }
+
+    stream->i_freqs[code] += 1;
+  });
+  stream->n_ibytes += 1;
+}
+
+static void
+xd3_count_mode (xd3_stream *stream, uint mode)
+{
+  IF_DEBUG1 ({
+  if (stream->i_modes == NULL &&
+      (stream->i_modes = xd3_alloc0 (stream, sizeof (stream->i_modes[0]), TOTAL_MODES (stream))) == NULL) { abort (); }
+  stream->i_modes[mode] += 1;
+  });
+}
+
+static void
+xd3_count_size (xd3_stream *stream, usize_t size)
+{
+  IF_DEBUG1({
+    if (stream->i_sizes == NULL &&
+	(stream->i_sizes = xd3_alloc0 (stream, sizeof (stream->i_sizes[0]), 64)) == NULL) { abort (); }
+
+    if (size < 64) { stream->i_sizes[size] += 1; }
+  });
+  stream->n_sbytes += xd3_sizeof_size (size);
+}
+#endif
+
+/******************************************************************************************
+ Address cache stuff
+ ******************************************************************************************/
+
+static int
+xd3_alloc_cache (xd3_stream *stream)
+{
+  if (((stream->acache.s_near > 0) &&
+       (stream->acache.near_array = xd3_alloc (stream, stream->acache.s_near, sizeof (usize_t))) == NULL) ||
+      ((stream->acache.s_same > 0) &&
+       (stream->acache.same_array = xd3_alloc (stream, stream->acache.s_same * 256, sizeof (usize_t))) == NULL))
+    {
+      return ENOMEM;
+    }
+
+  return 0;
+}
+
+static void
+xd3_init_cache (xd3_addr_cache* acache)
+{
+  if (acache->s_near > 0)
+    {
+      memset (acache->near_array, 0, acache->s_near * sizeof (usize_t));
+      acache->next_slot = 0;
+    }
+
+  if (acache->s_same > 0)
+    {
+      memset (acache->same_array, 0, acache->s_same * 256 * sizeof (usize_t));
+    }
+}
+
+static void
+xd3_update_cache (xd3_addr_cache* acache, usize_t addr)
+{
+  if (acache->s_near > 0)
+    {
+      acache->near_array[acache->next_slot] = addr;
+      acache->next_slot = (acache->next_slot + 1) % acache->s_near;
+    }
+
+  if (acache->s_same > 0)
+    {
+      acache->same_array[addr % (acache->s_same*256)] = addr;
+    }
+}
+
+#if XD3_ENCODER
+/* OPT: this gets called a lot, can it be optimized? */
+static int
+xd3_encode_address (xd3_stream *stream, usize_t addr, usize_t here, uint8_t* mode)
+{
+  usize_t d, bestd;
+  int   i, bestm, ret;
+  xd3_addr_cache* acache = & stream->acache;
+
+#define SMALLEST_INT(x) do { if (((x) & ~127) == 0) { goto good; } } while (0)
+
+  /* Attempt to find the address mode that yields the smallest integer value for "d", the
+   * encoded address value, thereby minimizing the encoded size of the address. */
+  bestd = addr;
+  bestm = VCD_SELF;
+
+  XD3_ASSERT (addr < here);
+
+  SMALLEST_INT (bestd);
+
+  if ((d = here-addr) < bestd)
+    {
+      bestd = d;
+      bestm = VCD_HERE;
+
+      SMALLEST_INT (bestd);
+    }
+
+  for (i = 0; i < acache->s_near; i += 1)
+    {
+      d = addr - acache->near_array[i];
+
+      if (d >= 0 && d < bestd)
+	{
+	  bestd = d;
+	  bestm = i+2; /* 2 counts the VCD_SELF, VCD_HERE modes */
+
+	  SMALLEST_INT (bestd);
+	}
+    }
+
+  if (acache->s_same > 0 && acache->same_array[d = addr%(acache->s_same*256)] == addr)
+    {
+      bestd = d%256;
+      bestm = acache->s_near + 2 + d/256; /* 2 + s_near offsets past the VCD_NEAR modes */
+
+      if ((ret = xd3_emit_byte (stream, & ADDR_TAIL (stream), bestd))) { return ret; }
+    }
+  else
+    {
+    good:
+
+      if ((ret = xd3_emit_size (stream, & ADDR_TAIL (stream), bestd))) { return ret; }
+    }
+
+  xd3_update_cache (acache, addr);
+
+  IF_DEBUG (xd3_count_mode (stream, bestm));
+
+  (*mode) += bestm;
+
+  return 0;
+}
+#endif
+
+static int
+xd3_decode_address (xd3_stream *stream, usize_t here, uint mode, const uint8_t **inpp, const uint8_t *max, uint32_t *valp)
+{
+  int ret;
+  uint same_start = 2 + stream->acache.s_near;
+
+  if (mode < same_start)
+    {
+      if ((ret = xd3_read_size (stream, inpp, max, valp))) { return ret; }
+
+      switch (mode)
+	{
+	case VCD_SELF:
+	  break;
+	case VCD_HERE:
+	  (*valp) = here - (*valp);
+	  break;
+	default:
+	  (*valp) += stream->acache.near_array[mode - 2];
+	  break;
+	}
+    }
+  else
+    {
+      if (*inpp == max)
+	{
+	  stream->msg = "address underflow";
+	  return EINVAL;
+	}
+
+      mode -= same_start;
+
+      (*valp) = stream->acache.same_array[mode*256 + (**inpp)];
+
+      (*inpp) += 1;
+    }
+
+  xd3_update_cache (& stream->acache, *valp);
+
+  return 0;
+}
+
+/******************************************************************************************
+ Alloc/free
+ ******************************************************************************************/
+
+static void*
+__xd3_alloc_func (void* opaque, usize_t items, usize_t size)
+{
+  return malloc (items * size);
+}
+
+static void
+__xd3_free_func (void* opaque, void* address)
+{
+  free (address);
+}
+
+static void*
+xd3_alloc (xd3_stream *stream,
+	   usize_t      elts,
+	   usize_t      size)
+{
+  void *a = stream->alloc (stream->opaque, elts, size);
+
+  if (a != NULL)
+    {
+      IF_DEBUG (stream->alloc_cnt += 1);
+    }
+  else
+    {
+      stream->msg = "out of memory";
+    }
+
+  return a;
+}
+
+static void
+xd3_free (xd3_stream *stream,
+	  void       *ptr)
+{
+  if (ptr != NULL)
+    {
+      IF_DEBUG (stream->free_cnt += 1);
+      XD3_ASSERT (stream->free_cnt <= stream->alloc_cnt);
+      stream->free (stream->opaque, ptr);
+    }
+}
+
+#if XD3_ENCODER
+static void*
+xd3_alloc0 (xd3_stream *stream,
+	    usize_t      elts,
+	    usize_t      size)
+{
+  void *a = xd3_alloc (stream, elts, size);
+
+  if (a != NULL)
+    {
+      memset (a, 0, elts * size);
+    }
+
+  return a;
+}
+
+static xd3_output*
+xd3_alloc_output (xd3_stream *stream,
+		  xd3_output *old_output)
+{
+  xd3_output *output;
+  uint8_t    *base;
+
+  if (stream->enc_free != NULL)
+    {
+      output = stream->enc_free;
+      stream->enc_free = output->next_page;
+    }
+  else
+    {
+      if ((output = xd3_alloc (stream, 1, sizeof (xd3_output))) == NULL)
+	{
+	  return NULL;
+	}
+
+      if ((base = xd3_alloc (stream, XD3_ALLOCSIZE, sizeof (uint8_t))) == NULL)
+	{
+	  xd3_free (stream, output);
+	  return NULL;
+	}
+
+      output->base  = base;
+      output->avail = XD3_ALLOCSIZE;
+    }
+
+  output->next = 0;
+
+  if (old_output)
+    {
+      old_output->next_page = output;
+    }
+
+  output->next_page = NULL;
+
+  return output;
+}
+
+static usize_t
+xd3_sizeof_output (xd3_output *output)
+{
+  usize_t s = 0;
+
+  for (; output; output = output->next_page)
+    {
+      s += output->next;
+    }
+
+  return s;
+}
+
+static void
+xd3_freelist_output (xd3_stream *stream,
+		     xd3_output *output)
+{
+  xd3_output *tmp;
+
+  while (output)
+    {
+      tmp    = output;
+      output = output->next_page;
+
+      tmp->next = 0;
+      tmp->next_page = stream->enc_free;
+      stream->enc_free = tmp;
+    }
+}
+
+static void
+xd3_free_output (xd3_stream *stream,
+		 xd3_output *output)
+{
+  xd3_output *next;
+
+ again:
+  if (output == NULL)
+    {
+      return;
+    }
+
+  next = output->next_page;
+
+  xd3_free (stream, output->base);
+  xd3_free (stream, output);
+
+  output = next;
+  goto again;
+}
+#endif /* XD3_ENCODER */
+
+void
+xd3_free_stream (xd3_stream *stream)
+{
+
+  xd3_free (stream, stream->large_table);
+  xd3_free (stream, stream->small_table);
+  xd3_free (stream, stream->small_prev);
+  xd3_free (stream, stream->iopt.buffer);
+
+#if XD3_ENCODER
+  {
+    int i;
+    for (i = 0; i < ENC_SECTS; i += 1)
+      {
+	xd3_free_output (stream, stream->enc_heads[i]);
+      }
+    xd3_free_output (stream, stream->enc_free);
+  }
+#endif
+
+  xd3_free (stream, stream->acache.near_array);
+  xd3_free (stream, stream->acache.same_array);
+
+  xd3_free (stream, stream->inst_sect.copied1);
+  xd3_free (stream, stream->addr_sect.copied1);
+  xd3_free (stream, stream->data_sect.copied1);
+
+  xd3_free (stream, stream->dec_buffer);
+  xd3_free (stream, (uint8_t*) stream->dec_lastwin);
+
+  xd3_free (stream, stream->buf_in);
+  xd3_free (stream, stream->dec_appheader);
+  xd3_free (stream, stream->dec_codetbl);
+  xd3_free (stream, stream->code_table_alloc);
+
+#if SECONDARY_ANY
+  xd3_free (stream, stream->inst_sect.copied2);
+  xd3_free (stream, stream->addr_sect.copied2);
+  xd3_free (stream, stream->data_sect.copied2);
+
+  if (stream->sec_type != NULL)
+    {
+      stream->sec_type->destroy (stream, stream->sec_stream_d);
+      stream->sec_type->destroy (stream, stream->sec_stream_i);
+      stream->sec_type->destroy (stream, stream->sec_stream_a);
+    }
+#endif
+
+  IF_DEBUG (xd3_free (stream, stream->i_freqs));
+  IF_DEBUG (xd3_free (stream, stream->i_modes));
+  IF_DEBUG (xd3_free (stream, stream->i_sizes));
+
+  XD3_ASSERT (stream->alloc_cnt == stream->free_cnt);
+
+  memset (stream, 0, sizeof (xd3_stream));
+}
+
+#if (XD3_DEBUG || VCDIFF_TOOLS)
+static const char*
+xd3_rtype_to_string (xd3_rtype type, int print_mode)
+{
+  switch (type)
+    {
+    case XD3_NOOP:
+      return "NOOP ";
+    case XD3_RUN:
+      return "RUN  ";
+    case XD3_ADD:
+      return "ADD  ";
+    default: break;
+    }
+  if (! print_mode)
+    {
+      return "CPY  ";
+    }
+  switch (type)
+    {
+    case XD3_CPY + 0: return "CPY_0";
+    case XD3_CPY + 1: return "CPY_1";
+    case XD3_CPY + 2: return "CPY_2";
+    case XD3_CPY + 3: return "CPY_3";
+    case XD3_CPY + 4: return "CPY_4";
+    case XD3_CPY + 5: return "CPY_5";
+    case XD3_CPY + 6: return "CPY_6";
+    case XD3_CPY + 7: return "CPY_7";
+    case XD3_CPY + 8: return "CPY_8";
+    case XD3_CPY + 9: return "CPY_9";
+    default:          return "CPY>9";
+    }
+}
+#endif
+
+/******************************************************************************************
+ Stream configuration
+ ******************************************************************************************/
+
+int
+xd3_config_stream(xd3_stream *stream,
+		   xd3_config *config)
+{
+  int ret;
+  xd3_config defcfg;
+  const xd3_smatcher* smatcher;
+
+  if (config == NULL)
+    {
+      config = & defcfg;
+      memset (config, 0, sizeof (*config));
+    }
+
+  /* Initial setup: no error checks yet */
+  memset (stream, 0, sizeof (*stream));
+
+  stream->memsize   = config->memsize   ? config->memsize : XD3_DEFAULT_MEMSIZE;
+  stream->winsize   = config->winsize   ? config->winsize : XD3_DEFAULT_WINSIZE;
+  stream->sprevsz   = config->sprevsz   ? config->sprevsz : XD3_DEFAULT_SPREVSZ;
+  stream->srcwin_size  = config->srcwin_size ? config->srcwin_size : XD3_DEFAULT_START_CKSUM_ADVANCE;
+  stream->srcwin_maxsz = config->srcwin_maxsz ? config->srcwin_maxsz : XD3_DEFAULT_MAX_CKSUM_ADVANCE;
+  stream->iopt_size = config->iopt_size ? config->iopt_size : XD3_DEFAULT_IOPT_SIZE;
+  stream->getblk    = config->getblk;
+  stream->alloc     = config->alloc ? config->alloc : __xd3_alloc_func;
+  stream->free      = config->freef ? config->freef : __xd3_free_func;
+  stream->opaque    = config->opaque;
+  stream->flags     = config->flags;
+
+  XD3_ASSERT (stream->winsize > 0);
+
+  /* Secondary setup. */
+  stream->sec_data  = config->sec_data;
+  stream->sec_inst  = config->sec_inst;
+  stream->sec_addr  = config->sec_addr;
+
+  stream->sec_data.data_type = DATA_SECTION;
+  stream->sec_inst.data_type = INST_SECTION;
+  stream->sec_addr.data_type = ADDR_SECTION;
+
+  /* Check static sizes. */
+  if (sizeof (usize_t) != SIZEOF_USIZE_T ||
+      sizeof (xoff_t) != SIZEOF_XOFF_T ||
+      (ret = xd3_check_pow2(XD3_ALLOCSIZE, NULL)))
+    {
+      stream->msg = "incorrect compilation: wrong integer sizes";
+      return EINVAL;
+    }
+
+  /* Check/set secondary compressor. */
+  switch (stream->flags & XD3_SEC_TYPE)
+    {
+    case 0:
+      if (stream->flags & XD3_SEC_OTHER)
+	{
+	  stream->msg = "XD3_SEC flags require a secondary compressor type";
+	  return EINVAL;
+	}
+      break;
+    case XD3_SEC_FGK:
+      FGK_CASE (stream);
+    case XD3_SEC_DJW:
+      DJW_CASE (stream);
+    default:
+      stream->msg = "too many secondary compressor types set";
+      return EINVAL;
+    }
+
+  /* Check/set encoder code table. */
+  switch (stream->flags & XD3_ALT_CODE_TABLE) {
+  case 0:
+    stream->code_table_desc = & __rfc3284_code_table_desc;
+    stream->code_table_func = xd3_rfc3284_code_table;
+    break;
+#if GENERIC_ENCODE_TABLES
+  case XD3_ALT_CODE_TABLE:
+    stream->code_table_desc = & __alternate_code_table_desc;
+    stream->code_table_func = xd3_alternate_code_table;
+    stream->comp_table_func = xd3_compute_alternate_table_encoding;
+    break;
+#endif
+  default:
+    stream->msg = "alternate code table support was not compiled";
+    return EINVAL;
+  }
+
+  /* Check sprevsz */
+  if (config->small_chain == 1)
+    {
+      stream->sprevsz = 0;
+    }
+  else
+    {
+      if ((ret = xd3_check_pow2 (stream->sprevsz, NULL)))
+	{
+	  stream->msg = "sprevsz is required to be a power of two";
+	  return EINVAL;
+	}
+
+      stream->sprevmask = stream->sprevsz - 1;
+    }
+
+  /* Default scanner settings. */
+  switch (config->smatch_cfg)
+    {
+      IF_BUILD_SOFT(case XD3_SMATCH_SOFT:
+      smatcher = & __smatcher_soft; break;
+
+      if (config->large_look  < MIN_MATCH ||
+	  config->large_step  < 1         ||
+	  config->small_look  < MIN_MATCH ||
+	  config->small_chain < 1         ||
+	  config->large_look  < config->small_look ||
+	  config->small_chain < config->small_lchain ||
+	  (config->small_lchain == 0 && config->try_lazy) ||
+	  config->srcwin_size < stream->large_look ||
+	  config->srcwin_maxsz < stream->srcwin_size)
+	{
+	  stream->msg = "invalid soft string-match config";
+	  return EINVAL;
+	}
+      break;)
+
+      IF_BUILD_SLOW(case XD3_SMATCH_DEFAULT:)
+      IF_BUILD_SLOW(case XD3_SMATCH_SLOW: smatcher = & __smatcher_slow; break;)
+      IF_BUILD_FAST(case XD3_SMATCH_FAST: smatcher = & __smatcher_fast; break;)
+    default:
+      stream->msg = "invalid string match config type";
+      return EINVAL;
+    }
+
+  stream->string_match  = smatcher->string_match;
+  XD3_ASSERT(stream->string_match);
+
+  XD3_COPY_CONFIG_FIELDS (stream, smatcher);
+
+  /* If it is a soft config, the smatcher fields didn't set anything, copy from config
+   * instead. */
+  if (stream->large_look == 0)
+    {
+      XD3_COPY_CONFIG_FIELDS (stream, config);
+    }
+
+  IF_DEBUG1 (P(RINT "[stream cfg] llook %u lstep %u slook %u\n",
+	       stream->large_look, stream->large_step, stream->small_look));
+  return 0;
+}
+
+/******************************************************************************************
+ Getblk interface
+ ******************************************************************************************/
+
+/* This function interfaces with the client getblk function, checks its results, etc. */
+static int
+xd3_getblk (xd3_stream *stream/*, xd3_source *source*/, xoff_t blkno)
+{
+  int ret;
+  xd3_source *source = stream->src;
+
+  if (blkno >= source->blocks)
+    {
+      stream->msg = "source file too short";
+      return EINVAL;
+    }
+
+  if (blkno != source->curblkno || source->curblk == NULL)
+    {
+      XD3_ASSERT (source->curblk != NULL || blkno != source->curblkno);
+
+      source->getblkno = blkno;
+
+      if (stream->getblk == NULL)
+	{
+	  stream->msg = "getblk source input";
+	  return XD3_GETSRCBLK;
+	}
+      else if ((ret = stream->getblk (stream, source, blkno)) != 0)
+	{
+	  stream->msg = "getblk failed";
+	  return ret;
+	}
+
+      XD3_ASSERT (source->curblk != NULL);
+    }
+
+  if (source->onblk != xd3_bytes_on_srcblk (source, blkno))
+    {
+      stream->msg = "getblk returned short block";
+      return EINVAL;
+    }
+
+  return 0;
+}
+
+/******************************************************************************************
+ Stream open/close
+ ******************************************************************************************/
+
+int
+xd3_set_source (xd3_stream *stream,
+		xd3_source *src)
+{
+  xoff_t blk_num;
+  xoff_t tail_size;
+
+  IF_DEBUG1 (P(RINT "[set source] size %"Q"u\n", src->size));
+
+  if (src == NULL || src->size < stream->large_look) { return 0; }
+
+  stream->src  = src;
+  blk_num      = src->size / src->blksize;
+  tail_size    = src->size % src->blksize;
+  src->blocks  = blk_num + (tail_size > 0);
+  src->srclen  = 0;
+  src->srcbase = 0;
+
+  return 0;
+}
+
+void
+xd3_abort_stream (xd3_stream *stream)
+{
+  stream->dec_state = DEC_ABORTED;
+  stream->enc_state = ENC_ABORTED;
+}
+
+int
+xd3_close_stream (xd3_stream *stream)
+{
+  if (stream->enc_state != 0 && stream->enc_state != ENC_ABORTED)
+    {
+      /* If encoding, should be ready for more input but not actually have any. */
+      if (stream->enc_state != ENC_INPUT || stream->avail_in != 0)
+	{
+	  stream->msg = "encoding is incomplete";
+	  return EINVAL;
+	}
+    }
+  else
+    {
+      switch (stream->dec_state)
+	{
+	case DEC_VCHEAD:
+	case DEC_WININD:
+	  /* TODO: Address the zero-byte ambiguity.  Does the encoder emit a window or
+	   * not?  If so, then catch an error here.  If not, need another routine to say
+	   * decode_at_least_one_if_empty. */
+	case DEC_ABORTED:
+	  break;
+	default:
+	  /* If decoding, should be ready for the next window. */
+	  stream->msg = "EOF in decode";
+	  return EINVAL;
+	}
+    }
+
+  return 0;
+}
+
+/******************************************************************************************
+ Application header
+ ******************************************************************************************/
+
+int
+xd3_get_appheader (xd3_stream  *stream,
+		   uint8_t    **data,
+		   usize_t      *size)
+{
+  if (stream->dec_state < DEC_WININD)
+    {
+      stream->msg = "application header not available";
+      return EINVAL;
+    }
+
+  (*data) = stream->dec_appheader;
+  (*size) = stream->dec_appheadsz;
+  return 0;
+}
+
+#if XD3_ENCODER
+void
+xd3_set_appheader (xd3_stream    *stream,
+		   const uint8_t *data,
+		   usize_t         size)
+{
+  stream->enc_appheader = data;
+  stream->enc_appheadsz = size;
+}
+
+/******************************************************************************************
+ Encoder stuff
+ ******************************************************************************************/
+
+#if XD3_DEBUG
+static int
+xd3_iopt_check (xd3_stream *stream)
+{
+  int ul = xd3_rlist_length (& stream->iopt.used);
+  int fl = xd3_rlist_length (& stream->iopt.free);
+
+  return (ul + fl + (stream->iout ? 1 : 0)) == stream->iopt_size;
+}
+#endif
+
+static xd3_rinst*
+xd3_iopt_free (xd3_stream *stream, xd3_rinst *i)
+{
+  xd3_rinst *n = xd3_rlist_remove (i);
+  xd3_rlist_push_back (& stream->iopt.free, i);
+  return n;
+}
+
+static void
+xd3_iopt_free_nonadd (xd3_stream *stream, xd3_rinst *i)
+{
+  if (i->type != XD3_ADD)
+    {
+      xd3_rlist_push_back (& stream->iopt.free, i);
+    }
+}
+
+/* When an instruction is ready to flush from the iopt buffer, this function is called to
+ * produce an encoding.  It writes the instruction plus size, address, and data to the
+ * various encoding sections. */
+static int
+xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst)
+{
+  int ret;
+
+  /* Check for input overflow. */
+  XD3_ASSERT (inst->pos + inst->size <= stream->avail_in);
+
+  switch (inst->type)
+    {
+    case XD3_CPY:
+      {
+	/* the address may have an offset if there is a source window. */
+	usize_t addr;
+	xd3_source *src = stream->src;
+
+	if (src != NULL)
+	  {
+	    /* If there is a source copy, the source must have its source window decided
+	     * before we can encode.  This can be bad -- we have to make this decision
+	     * even if no source matches have been found. */
+	    if (stream->srcwin_decided == 0)
+	      {
+		if ((ret = xd3_srcwin_setup (stream))) { return ret; }
+	      }
+
+	    /* xtra field indicates the copy is from the source */
+	    if (inst->xtra)
+	      {
+		XD3_ASSERT (inst->addr >= src->srcbase);
+		XD3_ASSERT (inst->addr + inst->size <= src->srcbase + src->srclen);
+		addr = (inst->addr - src->srcbase);
+	      }
+	    else
+	      {
+		/* with source window: target copy address is offset by taroff. */
+		addr = stream->taroff + (usize_t) inst->addr;
+	      }
+	  }
+	else
+	  {
+	    addr = (usize_t) inst->addr;
+	  }
+
+	XD3_ASSERT (inst->size >= MIN_MATCH);
+
+	/* the "here" position is always offset by taroff */
+	if ((ret = xd3_encode_address (stream, addr, inst->pos + stream->taroff, & inst->type)))
+	  {
+	    return ret;
+	  }
+
+	IF_DEBUG (stream->n_cpy += 1);
+	IF_DEBUG (stream->l_cpy += inst->size);
+
+	IF_DEBUG1 ({
+	  static int cnt;
+	  P(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %u\n",
+		   cnt++,
+		   stream->total_in + inst->pos,
+		   stream->total_in + inst->pos + inst->size,
+		   inst->addr, inst->addr + inst->size, inst->size);
+	});
+	break;
+      }
+    case XD3_RUN:
+      {
+	XD3_ASSERT (inst->size >= MIN_MATCH);
+
+	if ((ret = xd3_emit_byte (stream, & DATA_TAIL (stream), inst->xtra))) { return ret; }
+
+	IF_DEBUG (stream->n_run += 1);
+	IF_DEBUG (stream->l_run += inst->size);
+	IF_DEBUG (stream->n_dbytes += 1);
+
+	IF_DEBUG1 ({
+	  static int cnt;
+	  P(RINT "[iopt run:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size);
+	});
+	break;
+      }
+    case XD3_ADD:
+      {
+	if ((ret = xd3_emit_bytes (stream, & DATA_TAIL (stream),
+				   stream->next_in + inst->pos, inst->size))) { return ret; }
+
+	IF_DEBUG (stream->n_add += 1);
+	IF_DEBUG (stream->l_add += inst->size);
+	IF_DEBUG (stream->n_dbytes += inst->size);
+
+	IF_DEBUG1 ({
+	  static int cnt;
+	  P(RINT "[iopt add:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size);
+	});
+
+	break;
+      }
+    }
+
+  /* This is the only place stream->unencoded_offset is incremented. */
+  XD3_ASSERT (stream->unencoded_offset == inst->pos);
+  stream->unencoded_offset += inst->size;
+
+  IF_DEBUG (stream->n_emit += inst->size);
+
+  inst->code2 = 0;
+
+  XD3_CHOOSE_INSTRUCTION (stream, stream->iout, inst);
+
+  if (stream->iout != NULL)
+    {
+      if (stream->iout->code2 != 0)
+	{
+	  if ((ret = xd3_emit_double (stream, stream->iout, inst, stream->iout->code2))) { return ret; }
+
+	  xd3_iopt_free_nonadd (stream, stream->iout);
+	  xd3_iopt_free_nonadd (stream, inst);
+	  stream->iout = NULL;
+	  return 0;
+	}
+      else
+	{
+	  if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; }
+
+	  xd3_iopt_free_nonadd (stream, stream->iout);
+	}
+    }
+
+  stream->iout = inst;
+
+  return 0;
+}
+
+/* This possibly encodes an add instruction, iadd, which must remain on the stack until
+ * the following call to xd3_iopt_finish_encoding. */
+static int
+xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd)
+{
+  int ret;
+  usize_t off = stream->unencoded_offset;
+
+  if (pos > off)
+    {
+      iadd->type = XD3_ADD;
+      iadd->pos  = off;
+      iadd->size = pos - off;
+
+      if ((ret = xd3_iopt_finish_encoding (stream, iadd))) { return ret; }
+    }
+
+  return 0;
+}
+
+/* This function calls xd3_iopt_finish_encoding to finish encoding an instruction, and it
+ * may also produce an add instruction for an unmatched region. */
+static int
+xd3_iopt_add_encoding (xd3_stream *stream, xd3_rinst *inst)
+{
+  int ret;
+  xd3_rinst iadd;
+
+  if ((ret = xd3_iopt_add (stream, inst->pos, & iadd))) { return ret; }
+
+  if ((ret = xd3_iopt_finish_encoding (stream, inst))) { return ret; }
+
+  return 0;
+}
+
+/* Generates a final add instruction to encode the remaining input. */
+static int
+xd3_iopt_add_finalize (xd3_stream *stream)
+{
+  int ret;
+  xd3_rinst iadd;
+
+  if ((ret = xd3_iopt_add (stream, stream->avail_in, & iadd))) { return ret; }
+
+  if (stream->iout)
+    {
+      if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; }
+
+      xd3_iopt_free_nonadd (stream, stream->iout);
+      stream->iout = NULL;
+    }
+
+  return 0;
+}
+
+/* Compact the instruction buffer by choosing the best non-overlapping instructions when
+ * lazy string-matching.  There are no ADDs in the iopt buffer because those are
+ * synthesized in xd3_iopt_add_encoding and during xd3_iopt_add_finalize. */
+static int
+xd3_iopt_flush_instructions (xd3_stream *stream, int force)
+{
+  xd3_rinst *r1 = xd3_rlist_front (& stream->iopt.used);
+  xd3_rinst *r2;
+  xd3_rinst *r3;
+  usize_t r1end;
+  usize_t r2end;
+  usize_t r2off;
+  usize_t r2moff;
+  usize_t gap;
+  usize_t flushed;
+  int ret;
+
+  XD3_ASSERT (xd3_iopt_check (stream));
+
+  /* Note: once tried to skip this step if it's possible to assert there are no
+   * overlapping instructions.  Doesn't work because xd3_opt_erase leaves overlapping
+   * instructions. */
+  while (! xd3_rlist_end (& stream->iopt.used, r1) &&
+	 ! xd3_rlist_end (& stream->iopt.used, r2 = xd3_rlist_next (r1)))
+    {
+      r1end = r1->pos + r1->size;
+
+      /* If the instructions do not overlap, continue. */
+      if (r1end <= r2->pos)
+	{
+	  r1 = r2;
+	  continue;
+	}
+
+      r2end = r2->pos + r2->size;
+
+      /* The min_match adjustments prevent this. */
+      XD3_ASSERT (r2end > (r1end + LEAST_MATCH_INCR));
+
+      /* If r3 is available... */
+      if (! xd3_rlist_end (& stream->iopt.used, r3 = xd3_rlist_next (r2)))
+	{
+	  /* If r3 starts before r1 finishes or just about, r2 is irrelevant */
+	  if (r3->pos <= r1end + 1)
+	    {
+	      xd3_iopt_free (stream, r2);
+	      continue;
+	    }
+	}
+      else if (! force)
+	{
+	  /* Unless force, end the loop when r3 is not available. */
+	  break;
+	}
+
+      r2off  = r2->pos - r1->pos;
+      r2moff = r2end - r1end;
+      gap    = r2end - r1->pos;
+
+      /* If the two matches overlap almost entirely, choose the better match and discard
+       * the other.  This heuristic is BLACK MAGIC.  Havesomething better? */
+      if (gap < 2*MIN_MATCH || r2moff <= 2 || r2off <= 2)
+	{
+	  /* Only one match should be used, choose the longer one. */
+	  if (r1->size < r2->size)
+	    {
+	      xd3_iopt_free (stream, r1);
+	      r1 = r2;
+	    }
+	  else
+	    {
+	      /* We are guaranteed that r1 does not overlap now, so advance past r2 */
+	      r1 = xd3_iopt_free (stream, r2);
+	    }
+	  continue;
+	}
+      else
+	{
+	  /* Shorten one of the instructions -- could be optimized based on the address
+	   * cache. */
+	  usize_t average;
+	  usize_t newsize;
+	  usize_t adjust1;
+
+	  XD3_ASSERT (r1end > r2->pos && r2end > r1->pos);
+
+	  /* Try to balance the length of both instructions, but avoid making both longer
+	   * than MAX_MATCH_SPLIT . */
+	  average = (gap) / 2;
+	  newsize = min (MAX_MATCH_SPLIT, gap - average);
+
+	  /* Should be possible to simplify this code. */
+	  if (newsize > r1->size)
+	    {
+	      /* shorten r2 */
+	      adjust1 = r1end - r2->pos;
+	    }
+	  else if (newsize > r2->size)
+	    {
+	      /* shorten r1 */
+	      adjust1 = r1end - r2->pos;
+
+	      XD3_ASSERT (r1->size > adjust1);
+
+	      r1->size -= adjust1;
+
+	      /* don't shorten r2 */
+	      adjust1 = 0;
+	    }
+	  else
+	    {
+	      /* shorten r1 */
+	      adjust1 = r1->size - newsize;
+
+	      if (r2->pos > r1end - adjust1)
+		{
+		  adjust1 -= r2->pos - (r1end - adjust1);
+		}
+
+	      XD3_ASSERT (r1->size > adjust1);
+
+	      r1->size -= adjust1;
+
+	      /* shorten r2 */
+	      XD3_ASSERT (r1->pos + r1->size >= r2->pos);
+
+	      adjust1 = r1->pos + r1->size - r2->pos;
+	    }
+
+	  /* Fallthrough above if-else, shorten r2 */
+	  XD3_ASSERT (r2->size > adjust1);
+
+	  r2->size -= adjust1;
+	  r2->pos  += adjust1;
+	  r2->addr += adjust1;
+
+	  XD3_ASSERT (r1->size >= MIN_MATCH);
+	  XD3_ASSERT (r2->size >= MIN_MATCH);
+
+	  r1 = r2;
+	}
+    }
+
+  XD3_ASSERT (xd3_iopt_check (stream));
+
+  /* If forcing, pick instructions until the list is empty, otherwise this empties 50% of
+   * the queue. */
+  for (flushed = 0; ! xd3_rlist_empty (& stream->iopt.used); )
+    {
+      xd3_rinst *renc = xd3_rlist_pop_front (& stream->iopt.used);
+      if ((ret = xd3_iopt_add_encoding (stream, renc)))
+	{
+	  return ret;
+	}
+
+      if (! force)
+	{
+	  if (++flushed > stream->iopt_size / 2)
+	    {
+	      break;
+	    }
+
+	  /* If there are only two instructions remaining, break, because they were
+	   * not optimized.  This means there were more than 50% eliminated by the
+	   * loop above. */
+ 	  r1 = xd3_rlist_front (& stream->iopt.used);
+ 	  if (xd3_rlist_end(& stream->iopt.used, r1) ||
+ 	      xd3_rlist_end(& stream->iopt.used, r2 = xd3_rlist_next (r1)) ||
+ 	      xd3_rlist_end(& stream->iopt.used, r3 = xd3_rlist_next (r2)))
+ 	    {
+ 	      break;
+ 	    }
+	}
+    }
+
+  XD3_ASSERT (xd3_iopt_check (stream));
+
+  XD3_ASSERT (!force || xd3_rlist_length (& stream->iopt.used) == 0);
+
+  return 0;
+}
+
+static int
+xd3_iopt_get_slot (xd3_stream *stream, xd3_rinst** iptr)
+{
+  xd3_rinst *i;
+  int ret;
+
+  if (xd3_rlist_empty (& stream->iopt.free))
+    {
+      if ((ret = xd3_iopt_flush_instructions (stream, 0))) { return ret; }
+
+      XD3_ASSERT (! xd3_rlist_empty (& stream->iopt.free));
+    }
+
+  i = xd3_rlist_pop_back (& stream->iopt.free);
+
+  xd3_rlist_push_back (& stream->iopt.used, i);
+
+  (*iptr) = i;
+
+  return 0;
+}
+
+/* A copy is about to be emitted that extends backwards to POS, therefore it may
+ * completely cover some existing instructions in the buffer.  If an instruction is
+ * completely covered by this new match, erase it.  If the new instruction is covered by
+ * the previous one, return 1 to skip it. */
+static void
+xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size)
+{
+  while (! xd3_rlist_empty (& stream->iopt.used))
+    {
+      xd3_rinst *r = xd3_rlist_back (& stream->iopt.used);
+
+      /* Verify that greedy is working.  The previous instruction should end before the
+       * new one begins. */
+      XD3_ASSERT ((stream->flags & XD3_BEGREEDY) == 0 || (r->pos + r->size <= pos));
+      /* Verify that min_match is working.  The previous instruction should end before the
+       * new one ends. */
+      XD3_ASSERT ((stream->flags & XD3_BEGREEDY) != 0 || (r->pos + r->size < pos + size));
+
+      /* See if the last instruction starts before the new instruction.  If so, there is
+       * nothing to erase. */
+      if (r->pos < pos)
+	{
+	  return;
+	}
+
+      /* Otherwise, the new instruction covers the old one, delete it and repeat. */
+      xd3_rlist_remove (r);
+      xd3_rlist_push_back (& stream->iopt.free, r);
+    }
+}
+
+/* This function tells the last matched input position. */
+static usize_t
+xd3_iopt_last_matched (xd3_stream *stream)
+{
+  xd3_rinst *r;
+
+  if (xd3_rlist_empty (& stream->iopt.used))
+    {
+      return 0;
+    }
+
+  r = xd3_rlist_back (& stream->iopt.used);
+
+  return r->pos + r->size;
+}
+
+/******************************************************************************************
+ Emit routines
+ ******************************************************************************************/
+
+static int
+xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint code)
+{
+  int has_size = stream->code_table[code].size1 == 0;
+  int ret;
+
+  IF_DEBUG1 (P(RINT "[emit1] %u %s (%u) code %u\n",
+	       single->pos,
+	       xd3_rtype_to_string (single->type, 0),
+	       single->size,
+	       code));
+
+  if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) { return ret; }
+
+  if (has_size)
+    {
+      if ((ret = xd3_emit_size (stream, & INST_TAIL (stream), single->size))) { return ret; }
+
+      IF_DEBUG (xd3_count_size (stream, single->size));
+    }
+
+  IF_DEBUG (xd3_count_inst (stream, code));
+
+  return 0;
+}
+
+static int
+xd3_emit_double (xd3_stream *stream, xd3_rinst *first, xd3_rinst *second, uint code)
+{
+  int ret;
+
+  /* All double instructions use fixed sizes, so all we need to do is output the
+   * instruction code, no sizes. */
+  XD3_ASSERT (stream->code_table[code].size1 != 0 &&
+	      stream->code_table[code].size2 != 0);
+
+  if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code))) { return ret; }
+
+  IF_DEBUG1 (P(RINT "[emit2]: %u %s (%u) %s (%u) code %u\n",
+	       first->pos,
+	       xd3_rtype_to_string (first->type, 0),
+	       first->size,
+	       xd3_rtype_to_string (second->type, 0),
+	       second->size,
+	       code));
+
+  IF_DEBUG (xd3_count_inst (stream, code));
+
+  return 0;
+}
+
+/* This enters a potential run instruction into the iopt buffer.  The position argument is
+ * relative to the target window. */
+static INLINE int
+xd3_emit_run (xd3_stream *stream, usize_t pos, usize_t size, uint8_t run_c)
+{
+  xd3_rinst* ri;
+  int ret;
+
+  XD3_ASSERT (pos + size <= stream->avail_in);
+
+  if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; }
+
+  ri->type = XD3_RUN;
+  ri->xtra = run_c;
+  ri->pos  = pos;
+  ri->size = size;
+
+  return 0;
+}
+
+/* This enters a potential copy instruction into the iopt buffer.  The position argument
+ * is relative to the target window.. */
+static INLINE int
+xd3_found_match (xd3_stream *stream, usize_t pos, usize_t size, xoff_t addr, int is_source)
+{
+  xd3_rinst* ri;
+  int ret;
+
+  XD3_ASSERT (pos + size <= stream->avail_in);
+
+  if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; }
+
+  ri->type = XD3_CPY;
+  ri->xtra = is_source;
+  ri->pos  = pos;
+  ri->size = size;
+  ri->addr = addr;
+
+  return 0;
+}
+
+static int
+xd3_emit_hdr (xd3_stream *stream)
+{
+  int  ret;
+  int  use_secondary = stream->sec_type != NULL;
+  int  use_adler32   = stream->flags & XD3_ADLER32;
+  int  vcd_source    = xd3_encoder_used_source (stream);
+  uint win_ind = 0;
+  uint del_ind = 0;
+  usize_t enc_len;
+  usize_t tgt_len;
+  usize_t data_len;
+  usize_t inst_len;
+  usize_t addr_len;
+
+  XD3_ASSERT (stream->n_emit == stream->avail_in);
+
+  if (stream->current_window == 0)
+    {
+      uint hdr_ind = 0;
+      int use_appheader  = stream->enc_appheader != NULL;
+      int use_gencodetbl = GENERIC_ENCODE_TABLES && (stream->code_table_desc != & __rfc3284_code_table_desc);
+
+      if (use_secondary)  { hdr_ind |= VCD_SECONDARY; }
+      if (use_gencodetbl) { hdr_ind |= VCD_CODETABLE; }
+      if (use_appheader)  { hdr_ind |= VCD_APPHEADER; }
+
+      if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC1)) != 0 ||
+	  (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC2)) != 0 ||
+	  (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_MAGIC3)) != 0 ||
+	  (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), VCDIFF_VERSION)) != 0 ||
+	  (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), hdr_ind)) != 0)
+	{
+	  return ret;
+	}
+
+      /* Secondary compressor ID */
+#if SECONDARY_ANY
+      if (use_secondary && (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->sec_type->id))) { return ret; }
+#endif
+
+      /* Compressed code table */
+      if (use_gencodetbl)
+	{
+	  usize_t code_table_size;
+	  const uint8_t *code_table_data;
+
+	  if ((ret = stream->comp_table_func (stream, & code_table_data, & code_table_size))) { return ret; }
+
+	  if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), code_table_size + 2)) ||
+	      (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->code_table_desc->near_modes)) ||
+	      (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), stream->code_table_desc->same_modes)) ||
+	      (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), code_table_data, code_table_size))) { return ret; }
+	}
+
+      /* Application header */
+      if (use_appheader)
+	{
+	  if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->enc_appheadsz)) ||
+	      (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), stream->enc_appheader, stream->enc_appheadsz)))
+	    {
+	      return ret;
+	    }
+	}
+    }
+
+  /* try to compress this window */
+#if SECONDARY_ANY
+  if (use_secondary)
+    {
+      int data_sec = 0;
+      int inst_sec = 0;
+      int addr_sec = 0;
+
+#     define ENCODE_SECONDARY_SECTION(UPPER,LOWER) \
+             ((stream->flags & XD3_SEC_NO ## UPPER) == 0 && \
+              (ret = xd3_encode_secondary (stream, & UPPER ## _HEAD (stream), & UPPER ## _TAIL (stream), \
+					& xd3_sec_ ## LOWER (stream), \
+				        & stream->sec_ ## LOWER, & LOWER ## _sec)))
+
+      if (ENCODE_SECONDARY_SECTION (DATA, data) ||
+	  ENCODE_SECONDARY_SECTION (INST, inst) ||
+	  ENCODE_SECONDARY_SECTION (ADDR, addr))
+	{
+	  return ret;
+	}
+
+      del_ind |= (data_sec ? VCD_DATACOMP : 0);
+      del_ind |= (inst_sec ? VCD_INSTCOMP : 0);
+      del_ind |= (addr_sec ? VCD_ADDRCOMP : 0);
+    }
+#endif
+
+  /* if (vcd_target) { win_ind |= VCD_TARGET; } */
+  if (vcd_source)  { win_ind |= VCD_SOURCE; }
+  if (use_adler32) { win_ind |= VCD_ADLER32; }
+
+  /* window indicator */
+  if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), win_ind))) { return ret; }
+
+  /* source window */
+  if (vcd_source)
+    {
+      /* or (vcd_target) { ... } */
+      if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->src->srclen)) ||
+	  (ret = xd3_emit_size (stream, & HDR_TAIL (stream), stream->src->srcbase))) { return ret; }
+    }
+
+  tgt_len  = stream->avail_in;
+  data_len = xd3_sizeof_output (DATA_HEAD (stream));
+  inst_len = xd3_sizeof_output (INST_HEAD (stream));
+  addr_len = xd3_sizeof_output (ADDR_HEAD (stream));
+
+  /* The enc_len field is redundent... doh! */
+  enc_len = (1 + (xd3_sizeof_size (tgt_len) +
+		  xd3_sizeof_size (data_len) +
+		  xd3_sizeof_size (inst_len) +
+		  xd3_sizeof_size (addr_len)) +
+	     data_len +
+	     inst_len +
+	     addr_len +
+	     (use_adler32 ? 4 : 0));
+
+  if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), enc_len)) ||
+      (ret = xd3_emit_size (stream, & HDR_TAIL (stream), tgt_len)) ||
+      (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), del_ind)) ||
+      (ret = xd3_emit_size (stream, & HDR_TAIL (stream), data_len)) ||
+      (ret = xd3_emit_size (stream, & HDR_TAIL (stream), inst_len)) ||
+      (ret = xd3_emit_size (stream, & HDR_TAIL (stream), addr_len)))
+    {
+      return ret;
+    }
+
+  if (use_adler32)
+    {
+      uint8_t  send[4];
+      uint32_t a32 = adler32 (1L, stream->next_in, stream->avail_in);
+
+      send[0] = (a32 >> 24);
+      send[1] = (a32 >> 16);
+      send[2] = (a32 >> 8);
+      send[3] = (a32 & 0xff);
+
+      if ((ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), send, 4))) { return ret; }
+    }
+
+  return 0;
+}
+
+/******************************************************************************************
+ Encode routines
+ ******************************************************************************************/
+
+static int
+xd3_encode_buffer_leftover (xd3_stream *stream)
+{
+  usize_t take;
+  usize_t room;
+
+  /* Allocate the buffer. */
+  if (stream->buf_in == NULL && (stream->buf_in = xd3_alloc (stream, stream->winsize, 1)) == NULL)
+    {
+      return ENOMEM;
+    }
+
+  /* Take leftover input first. */
+  if (stream->buf_leftover != NULL)
+    {
+      XD3_ASSERT (stream->buf_avail == 0);
+      XD3_ASSERT (stream->buf_leftavail < stream->winsize);
+
+      IF_DEBUG1 (P(RINT "[leftover] previous %u avail %u\n", stream->buf_leftavail, stream->avail_in));
+
+      memcpy (stream->buf_in, stream->buf_leftover, stream->buf_leftavail);
+
+      stream->buf_leftover = NULL;
+      stream->buf_avail    = stream->buf_leftavail;
+    }
+
+  /* Copy into the buffer. */
+  room = stream->winsize - stream->buf_avail;
+  take = min (room, stream->avail_in);
+
+  memcpy (stream->buf_in + stream->buf_avail, stream->next_in, take);
+
+  stream->buf_avail += take;
+
+  if (take < stream->avail_in)
+    {
+      /* Buffer is full */
+      stream->buf_leftover  = stream->next_in  + take;
+      stream->buf_leftavail = stream->avail_in - take;
+
+      IF_DEBUG1 (P(RINT "[leftover] take %u remaining %u\n", take, stream->buf_leftavail));
+    }
+  else if ((stream->buf_avail < stream->winsize) && !(stream->flags & XD3_FLUSH))
+    {
+      /* Buffer has space */
+      IF_DEBUG1 (P(RINT "[leftover] %u emptied\n", take));
+      return XD3_INPUT;
+    }
+
+  /* Use the buffer: */
+  stream->next_in   = stream->buf_in;
+  stream->avail_in  = stream->buf_avail;
+  stream->buf_avail = 0;
+
+  return 0;
+}
+
+/* This function allocates all memory initially used by the encoder. */
+static int
+xd3_encode_init (xd3_stream *stream)
+{
+  int i;
+  int large_comp = (stream->src != NULL);
+  int small_comp = ! (stream->flags & XD3_NOCOMPRESS);
+  /*int small_prev = (stream->small_chain > 1);*/
+  int space_fact = (large_comp + small_comp);
+  int memsize    = stream->memsize;
+
+  /* Memory allocations for checksum tables are delayed until xd3_string_match_init in the
+   * first call to string_match--that way identical or short inputs require no table
+   * allocation. */
+  if (large_comp)
+    {
+      xd3_size_hashtable (stream, memsize / space_fact, & stream->large_hash);
+    }
+
+  if (small_comp)
+    {
+      xd3_size_hashtable (stream, memsize / space_fact, & stream->small_hash);
+    }
+
+  for (i = 0; i < ENC_SECTS; i += 1)
+    {
+      if ((stream->enc_heads[i] = stream->enc_tails[i] =
+	   xd3_alloc_output (stream, NULL)) == NULL)
+	{
+	  goto fail;
+	}
+    }
+
+  /* iopt buffer */
+  xd3_rlist_init (& stream->iopt.used);
+  xd3_rlist_init (& stream->iopt.free);
+
+  if ((stream->iopt.buffer = xd3_alloc (stream, sizeof (xd3_rinst), stream->iopt_size)) == NULL)
+    {
+      goto fail;
+    }
+
+  for (i = 0; i < stream->iopt_size; i += 1)
+    {
+      xd3_rlist_push_back (& stream->iopt.free, & stream->iopt.buffer[i]);
+    }
+
+  XD3_ASSERT (xd3_rlist_length (& stream->iopt.free) == stream->iopt_size);
+  XD3_ASSERT (xd3_rlist_length (& stream->iopt.used) == 0);
+
+  /* address cache, code table */
+  stream->acache.s_near = stream->code_table_desc->near_modes;
+  stream->acache.s_same = stream->code_table_desc->same_modes;
+  stream->code_table    = stream->code_table_func ();
+
+  return xd3_alloc_cache (stream);
+
+ fail:
+
+  return ENOMEM;
+}
+
+#if XD3_DEBUG
+static int
+xd3_check_sprevlist (xd3_stream *stream)
+{
+  int i;
+  for (i = 0; i < stream->sprevsz; i += 1)
+    {
+      xd3_slist *l = & stream->small_prev[i];
+
+      XD3_ASSERT (l->prev->next == l);
+      XD3_ASSERT (l->next->prev == l);
+    }
+  return 1;
+}
+#endif
+
+/* Called after the ENC_POSTOUT state, this puts the output buffers back into separate
+ * lists and re-initializes some variables.  (The output lists were spliced together
+ * during the ENC_FLUSH state.) */
+static void
+xd3_encode_reset (xd3_stream *stream)
+{
+  int i;
+  xd3_output *olist;
+
+  XD3_ASSERT (stream->small_prev == NULL || xd3_check_sprevlist (stream));
+
+  IF_DEBUG (stream->n_emit = 0);
+  stream->avail_in     = 0;
+  stream->small_reset  = 1;
+
+  if (stream->src != NULL)
+    {
+      stream->src->srcbase   = 0;
+      stream->src->srclen    = 0;
+      stream->srcwin_decided = 0;
+      stream->match_minaddr  = 0;
+      stream->match_maxaddr  = 0;
+      stream->taroff         = 0;
+    }
+
+  /* Reset output chains. */
+  olist = stream->enc_heads[0];
+
+  for (i = 0; i < ENC_SECTS; i += 1)
+    {
+      XD3_ASSERT (olist != NULL);
+
+      stream->enc_heads[i] = olist;
+      stream->enc_tails[i] = olist;
+      olist = olist->next_page;
+
+      stream->enc_heads[i]->next = 0;
+      stream->enc_heads[i]->next_page = NULL;
+
+      stream->enc_tails[i]->next_page = NULL;
+      stream->enc_tails[i] = stream->enc_heads[i];
+    }
+
+  xd3_freelist_output (stream, olist);
+}
+
+/* The main encoding routine. */
+int
+xd3_encode_input (xd3_stream *stream)
+{
+  int ret, i;
+
+  if (stream->dec_state != 0)
+    {
+      stream->msg = "encoder/decoder transition";
+      return EINVAL;
+    }
+
+  switch (stream->enc_state)
+    {
+    case ENC_INIT:
+      /* Only reached on first time through: memory setup. */
+      if ((ret = xd3_encode_init (stream))) { return ret; }
+
+      stream->enc_state = ENC_INPUT;
+
+    case ENC_INPUT:
+
+      /* If there is no input yet, just return.  This checks for next_in == NULL, not
+       * avail_in == 0 since zero bytes is a valid input.  There is an assertion in
+       * xd3_avail_input() that next_in != NULL for this reason.  By returning right away
+       * we avoid creating an input buffer before the caller has supplied its first data.
+       * It is possible for xd3_avail_input to be called both before and after the first
+       * call to xd3_encode_input(). */
+      if (stream->next_in == NULL)
+	{
+	  return XD3_INPUT;
+	}
+
+    enc_flush:
+      /* See if we should buffer the input: either if there is already a leftover buffer,
+       * or if the input is short of winsize without flush.  The label at this point is
+       * reached by a goto below, when there is leftover input after postout. */
+      if ((stream->buf_leftover != NULL) ||
+	  (stream->avail_in < stream->winsize && ! (stream->flags & XD3_FLUSH)))
+	{
+	  if ((ret = xd3_encode_buffer_leftover (stream))) { return ret; }
+	}
+
+      /* Initalize the address cache before each window. */
+      xd3_init_cache (& stream->acache);
+
+      pos_in    = 0;
+      min_match = MIN_MATCH;
+      stream->unencoded_offset = 0;
+
+      stream->enc_state = ENC_SEARCH;
+
+      IF_DEBUG1 (P(RINT "[input window:%"Q"u] input bytes %u offset %"Q"u\n",
+		   stream->current_window, stream->avail_in, stream->total_in));
+
+      return XD3_WINSTART;
+
+    case ENC_SEARCH:
+
+      /* Reentrant matching. */
+      if (stream->src != NULL)
+	{
+	  switch (stream->match_state)
+	    {
+	    case MATCH_TARGET:
+	      /* Try matching forward at the start of the target.  This is entered the
+	       * first time through, to check for a perfect match, and whenever there is a
+	       * source match that extends to the end of the previous window.  The
+	       * match_srcpos field is initially zero and later set during
+	       * xd3_source_extend_match. */
+	      if (stream->avail_in > 0) {
+		/* This call can't fail because the source window is unrestricted. */
+		ret = xd3_source_match_setup (stream, stream->match_srcpos);
+		XD3_ASSERT (ret == 0);
+		stream->match_state = MATCH_FORWARD;
+	      } else {
+		stream->match_state = MATCH_SEARCHING;
+	      }
+	      XD3_ASSERT (stream->match_fwd == 0);
+
+	    case MATCH_FORWARD:
+	    case MATCH_BACKWARD:
+	      if (stream->avail_in != 0)
+		{
+		  if ((ret = xd3_source_extend_match (stream)) != 0)
+		    {
+		      return ret;
+		    }
+
+		  stream->input_position += stream->match_fwd;
+		}
+
+	    case MATCH_SEARCHING:
+	      /* Continue string matching.  (It's possible that the initial match
+	       * continued through the entire input, in which case we're still in
+	       * MATCH_FORWARD and should remain so for the next input window.) */
+	      break;
+	    }
+	}
+
+      /* String matching... */
+      if (stream->avail_in != 0 &&
+	  (ret = stream->string_match (stream)))
+	{
+	  return ret;
+	}
+
+      /* Flush the instrution buffer, then possibly add one more instruction, then emit
+       * the header. */
+      stream->enc_state = ENC_FLUSH;
+      if ((ret = xd3_iopt_flush_instructions (stream, 1)) ||
+          (ret = xd3_iopt_add_finalize (stream)) ||
+	  (ret = xd3_emit_hdr (stream)))
+	{
+	  return ret;
+	}
+
+      /* Begin output. */
+      stream->enc_current = HDR_HEAD (stream);
+
+      /* Chain all the outputs together.  After doing this, it looks as if there is only
+       * one section.  The other enc_heads are set to NULL to avoid freeing them more than
+       * once. */
+       for (i = 1; i < ENC_SECTS; i += 1)
+	{
+	  stream->enc_tails[i-1]->next_page = stream->enc_heads[i];
+	  stream->enc_heads[i] = NULL;
+	}
+
+    enc_output:
+
+      stream->enc_state  = ENC_POSTOUT;
+      stream->next_out   = stream->enc_current->base;
+      stream->avail_out  = stream->enc_current->next;
+      stream->total_out += (xoff_t) stream->avail_out;
+
+      /* If there is any output in this buffer, return it, otherwise fall through to
+       * handle the next buffer or finish the window after all buffers have been
+       * output. */
+      if (stream->avail_out > 0)
+	{
+	  /* This is the only place xd3_encode returns XD3_OUTPUT */
+	  return XD3_OUTPUT;
+	}
+
+    case ENC_POSTOUT:
+
+      if (stream->avail_out != 0)
+	{
+	  stream->msg = "missed call to consume output";
+	  return EINVAL;
+	}
+
+      /* Continue outputting one buffer at a time, until the next is NULL. */
+      if ((stream->enc_current = stream->enc_current->next_page) != NULL)
+	{
+	  goto enc_output;
+	}
+
+      stream->total_in += (xoff_t) stream->avail_in;
+      stream->enc_state = ENC_POSTWIN;
+
+      return XD3_WINFINISH;
+
+    case ENC_POSTWIN:
+
+      xd3_encode_reset (stream);
+
+      stream->current_window += 1;
+      stream->enc_state = ENC_INPUT;
+
+      /* If there is leftover input to flush, repeat. */
+      if ((stream->buf_leftover != NULL) && (stream->flags & XD3_FLUSH))
+	{
+	  goto enc_flush;
+	}
+
+      /* Ready for more input. */
+      return XD3_INPUT;
+
+    default:
+      stream->msg = "invalid state";
+      return EINVAL;
+    }
+}
+#endif /* XD3_ENCODER */
+
+/******************************************************************************************
+ Client convenience functions
+ ******************************************************************************************/
+
+/* This function invokes either encode or decode to and from in-memory arrays.  The output array
+ * must be large enough to hold the output or else ENOSPC is returned. */
+static int
+xd3_process_completely (xd3_stream    *stream,
+			int          (*func) (xd3_stream *),
+			int            close_stream,
+			const uint8_t *input,
+			usize_t         input_size,
+			uint8_t       *output,
+			usize_t        *output_size,
+			usize_t         avail_size)
+{
+  (*output_size) = 0;
+
+  stream->flags |= XD3_FLUSH;
+
+  xd3_avail_input (stream, input, input_size);
+
+  for (;;)
+    {
+      int ret;
+      switch((ret = func (stream)))
+	{
+	case XD3_OUTPUT: { /* memcpy below */ break; }
+	case XD3_INPUT: { /* this means EOF */ goto done; }
+	case XD3_GOTHEADER: { /* ignore */ continue; }
+	case XD3_WINSTART: { /* ignore */ continue; }
+	case XD3_WINFINISH: { /* ignore */ continue; }
+	case XD3_GETSRCBLK:
+	  {
+	    stream->msg = "stream requires source input";
+	    return EINVAL;
+	  }
+	case 0: /* there is no plain "success" return for xd3_encode/decode */
+	  XD3_ASSERT (ret != 0);
+	default:
+	  return ret;
+	}
+
+      if (*output_size + stream->avail_out > avail_size)
+	{
+	  stream->msg = "insufficient output space";
+	  return ENOSPC;
+	}
+
+      memcpy (output + *output_size, stream->next_out, stream->avail_out);
+
+      *output_size += stream->avail_out;
+
+      xd3_consume_output (stream);
+    }
+ done:
+  return (close_stream == 0) ? 0 : xd3_close_stream (stream);
+}
+
+int
+xd3_decode_completely (xd3_stream    *stream,
+		       const uint8_t *input,
+		       usize_t         input_size,
+		       uint8_t       *output,
+		       usize_t        *output_size,
+		       usize_t         avail_size)
+{
+  return xd3_process_completely (stream, & xd3_decode_input, 1,
+				 input, input_size,
+				 output, output_size, avail_size);
+}
+
+#if XD3_ENCODER
+int
+xd3_encode_completely (xd3_stream    *stream,
+		       const uint8_t *input,
+		       usize_t         input_size,
+		       uint8_t       *output,
+		       usize_t        *output_size,
+		       usize_t         avail_size)
+{
+  return xd3_process_completely (stream, & xd3_encode_input, 1,
+				 input, input_size,
+				 output, output_size, avail_size);
+}
+#endif
+
+/******************************************************************************************
+ DECODE stuff
+ ******************************************************************************************/
+
+/* Return true if the caller must provide a source.  Theoretically, this has to be checked
+ * after every window.  It could be that the first window requires no source, but the
+ * second window does.  In practice? */
+int xd3_decoder_needs_source (xd3_stream *stream)
+{
+  return stream->dec_win_ind & VCD_SOURCE;
+}
+
+/* Initialize the decoder for a new window.  The dec_tgtlen value is preserved across
+ * successive window decodings, and the update to dec_winstart is delayed until a new
+ * window actually starts.  This is to avoid throwing an error due to overflow until the
+ * last possible moment.  This makes it possible to encode exactly 4GB through a 32-bit
+ * encoder. */
+static int
+xd3_decode_init_window (xd3_stream *stream)
+{
+  stream->dec_cpylen = 0;
+  stream->dec_cpyoff = 0;
+  stream->dec_cksumbytes = 0;
+
+  xd3_init_cache (& stream->acache);
+
+  return 0;
+}
+
+/* Allocates buffer space for the target window and possibly the VCD_TARGET copy-window.
+ * Also sets the base of the two copy segments. */
+static int
+xd3_decode_setup_buffers (xd3_stream *stream)
+{
+  /* If VCD_TARGET is set then the previous buffer may be reused. */
+  if (stream->dec_win_ind & VCD_TARGET)
+    {
+      /* But this implementation only supports copying from the last target window.  If the
+       * offset is outside that range, it can't be done. */
+      if (stream->dec_cpyoff < stream->dec_laststart)
+	{
+	  stream->msg = "unsupported VCD_TARGET offset";
+	  return EINVAL;
+	}
+
+      /* See if the two windows are the same.  This indicates the first time VCD_TARGET is
+       * used.  This causes a second buffer to be allocated, after that the two are
+       * swapped in the DEC_FINISH case. */
+      if (stream->dec_lastwin == stream->next_out)
+	{
+	  stream->next_out  = NULL;
+	  stream->space_out = 0;
+	}
+
+      stream->dec_cpyaddrbase = stream->dec_lastwin + (usize_t) (stream->dec_cpyoff - stream->dec_laststart);
+    }
+
+  /* See if the current output window is large enough. */
+  if (stream->space_out < stream->dec_tgtlen)
+    {
+      xd3_free (stream, stream->dec_buffer);
+
+      stream->space_out = xd3_round_blksize (stream->dec_tgtlen, XD3_ALLOCSIZE);
+
+      if ((stream->dec_buffer = xd3_alloc (stream, stream->space_out, 1)) == NULL)
+	{
+	  return ENOMEM;
+	}
+
+      stream->next_out = stream->dec_buffer;
+    }
+
+  /* dec_tgtaddrbase refers to an invalid base address, but it is always used with a
+   * sufficiently large instruction offset (i.e., beyond the copy window).  This condition
+   * is enforced by xd3_decode_output_halfinst. */
+  stream->dec_tgtaddrbase = stream->next_out - stream->dec_cpylen;
+
+  return 0;
+}
+
+static int
+xd3_decode_allocate (xd3_stream  *stream,
+		     usize_t       size,
+		     uint8_t    **copied1,
+		     usize_t      *alloc1,
+		     uint8_t    **copied2,
+		     usize_t      *alloc2)
+{
+  if (*copied1 != NULL && *alloc1 < size)
+    {
+      xd3_free (stream, *copied1);
+      *copied1 = NULL;
+    }
+
+  if (*copied1 == NULL)
+    {
+#if SECONDARY_ANY
+      /* Borrow from the secondary compressor's allocation. */
+      if (copied2 != NULL && *copied2 != NULL && *alloc2 < size)
+	{
+	  *copied1 = *copied2;
+	  *alloc1  = *alloc2;
+	  *copied2 = NULL;
+	  *alloc2  = 0;
+	}
+      else
+#endif
+	{
+	  *alloc1 = xd3_round_blksize (size, XD3_ALLOCSIZE);
+
+	  if ((*copied1 = xd3_alloc (stream, *alloc1, 1)) == NULL)
+	    {
+	      return ENOMEM;
+	    }
+	}
+    }
+
+  return 0;
+}
+
+static int
+xd3_decode_section (xd3_stream *stream,
+		    xd3_desect *section,
+		    xd3_decode_state nstate,
+		    int copy)
+{
+  XD3_ASSERT (section->pos <= section->size);
+  XD3_ASSERT (stream->dec_state != nstate);
+
+  if (section->pos < section->size)
+    {
+      usize_t sect_take;
+
+      if (stream->avail_in == 0)
+	{
+	  return XD3_INPUT;
+	}
+
+      if ((copy == 0) && (section->pos == 0))
+	{
+	  /* No allocation/copy needed */
+	  section->buf = stream->next_in;
+	  sect_take    = section->size;
+	}
+      else
+	{
+	  usize_t sect_need = section->size - section->pos;
+
+	  /* Allocate and copy */
+	  sect_take = min (sect_need, stream->avail_in);
+
+	  if (section->pos == 0)
+	    {
+	      int ret;
+
+	      if ((ret = xd3_decode_allocate (stream,
+					      section->size,
+					      & section->copied1,
+					      & section->alloc1,
+					      & section->copied2,
+					      & section->alloc2))) { return ret; }
+
+	      section->buf = section->copied1;
+	    }
+
+	  memcpy (section->copied1 + section->pos,
+		  stream->next_in,
+		  sect_take);
+	}
+
+      section->pos += sect_take;
+
+      stream->dec_winbytes += sect_take;
+
+      DECODE_INPUT (sect_take);
+    }
+
+  if (section->pos < section->size)
+    {
+      stream->msg = "further input required";
+      return XD3_INPUT;
+    }
+
+  XD3_ASSERT (section->pos == section->size);
+
+  stream->dec_state = nstate;
+  section->buf_max  = section->buf + section->size;
+  section->pos      = 0;
+  return 0;
+}
+
+/* Decode the size and address for half of an instruction (i.e., a single opcode).  This
+ * updates the stream->dec_position, which are bytes already output prior to processing
+ * this instruction.  Perform bounds checking for sizes and copy addresses, which uses the
+ * dec_position (which is why these checks are done here). */
+static int
+xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst)
+{
+  int ret;
+
+  /* If the size from the instruction table is zero then read a size value. */
+  if ((inst->size == 0) &&
+      (ret = xd3_read_size (stream,
+ 			    & stream->inst_sect.buf,
+			      stream->inst_sect.buf_max,
+
+			    & inst->size)))
+    {
+      return EINVAL;
+    }
+
+  /* For copy instructions, read address. */
+  if (inst->type >= XD3_CPY)
+    {
+      IF_DEBUG1 ({
+	static int cnt = 0;
+	P(RINT "DECODE:%u: COPY at %"Q"u (winoffset %u) size %u winaddr %u\n",
+		 cnt++,
+		 stream->total_out + (stream->dec_position - stream->dec_cpylen),
+		 (stream->dec_position - stream->dec_cpylen),
+		 inst->size,
+		 inst->addr);
+      });
+
+      if ((ret = xd3_decode_address (stream,
+				     stream->dec_position,
+				     inst->type - XD3_CPY,
+				     & stream->addr_sect.buf,
+				     stream->addr_sect.buf_max,
+				     & inst->addr)))
+	{
+	  return ret;
+	}
+
+      /* Cannot copy an address before it is filled-in. */
+      if (inst->addr >= stream->dec_position)
+	{
+	  stream->msg = "address too large";
+	  return EINVAL;
+	}
+
+      /* Check: a VCD_TARGET or VCD_SOURCE copy cannot exceed the remaining buffer space
+       * in its own segment. */
+      if (inst->addr < stream->dec_cpylen && inst->addr + inst->size > stream->dec_cpylen)
+	{
+	  stream->msg = "size too large";
+	  return EINVAL;
+	}
+    }
+  else
+    {
+      IF_DEBUG1 ({
+	if (inst->type == XD3_ADD)
+	  {
+	    static int cnt;
+	    P(RINT "DECODE:%d: ADD at %"Q"u (winoffset %u) size %u\n",
+		     cnt++,
+		     stream->total_out + stream->dec_position - stream->dec_cpylen,
+		     stream->dec_position - stream->dec_cpylen,
+		     inst->size);
+	  }
+	else
+	  {
+	    static int cnt;
+	    XD3_ASSERT (inst->type == XD3_RUN);
+	    P(RINT "DECODE:%d: RUN at %"Q"u (winoffset %u) size %u\n",
+		     cnt++,
+		     stream->total_out + stream->dec_position - stream->dec_cpylen,
+		     stream->dec_position - stream->dec_cpylen,
+		     inst->size);
+	  }
+      });
+    }
+
+  /* Check: The instruction will not overflow the output buffer. */
+  if (stream->dec_position + inst->size > stream->dec_maxpos)
+    {
+      stream->msg = "size too large";
+      return EINVAL;
+    }
+
+  stream->dec_position += inst->size;
+  return 0;
+}
+
+/* Decode a single opcode and then decode the two half-instructions. */
+static int
+xd3_decode_instruction (xd3_stream *stream)
+{
+  int ret;
+  const xd3_dinst *inst;
+
+  if (stream->inst_sect.buf == stream->inst_sect.buf_max)
+    {
+      stream->msg = "instruction underflow";
+      return EINVAL;
+    }
+
+  inst = &stream->code_table[*stream->inst_sect.buf++];
+
+  stream->dec_current1.type = inst->type1;
+  stream->dec_current2.type = inst->type2;
+  stream->dec_current1.size = inst->size1;
+  stream->dec_current2.size = inst->size2;
+
+  /* For each instruction with a real operation, decode the corresponding size and
+   * addresses if necessary.  Assume a code-table may have NOOP in either position,
+   * although this is unlikely. */
+  if (inst->type1 != XD3_NOOP && (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current1)))
+    {
+      return ret;
+    }
+  if (inst->type2 != XD3_NOOP && (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current2)))
+    {
+      return ret;
+    }
+  return 0;
+}
+
+/* Output the result of a single half-instruction. OPT: This the decoder hotspot. */
+static int
+xd3_decode_output_halfinst (xd3_stream *stream, xd3_hinst *inst)
+{
+  /* To make this reentrant, set take = min (inst->size, available space)... */
+  usize_t take = inst->size;
+
+  XD3_ASSERT (inst->type != XD3_NOOP);
+
+  switch (inst->type)
+    {
+    case XD3_RUN:
+      {
+	/* Only require a single data byte. */
+	if (stream->data_sect.buf == stream->data_sect.buf_max)
+	  {
+	    stream->msg = "data underflow";
+	    return EINVAL;
+	  }
+
+	/* TUNE: Probably want to eliminate memset/memcpy here */
+	memset (stream->next_out + stream->avail_out,
+		stream->data_sect.buf[0],
+		take);
+
+	stream->data_sect.buf += 1;
+	stream->avail_out += take;
+	inst->type = XD3_NOOP;
+	break;
+      }
+    case XD3_ADD:
+      {
+	/* Require at least TAKE data bytes. */
+	if (stream->data_sect.buf + take > stream->data_sect.buf_max)
+	  {
+	    stream->msg = "data underflow";
+	    return EINVAL;
+	  }
+
+	memcpy (stream->next_out + stream->avail_out,
+		stream->data_sect.buf,
+		take);
+
+	stream->data_sect.buf += take;
+	stream->avail_out += take;
+	inst->type = XD3_NOOP;
+	break;
+      }
+    default:
+      {
+	usize_t i;
+	const uint8_t *src;
+	uint8_t *dst;
+
+	/* See if it copies from the VCD_TARGET/VCD_SOURCE window or the target window.
+	 * Out-of-bounds checks for the addresses and sizes are performed in
+	 * xd3_decode_parse_halfinst. */
+	if (inst->addr < stream->dec_cpylen)
+	  {
+	    if (stream->dec_win_ind & VCD_TARGET)
+	      {
+		/* For VCD_TARGET we know the entire range is in-memory, as established by
+		 * decode_setup_buffers. */
+		src = stream->dec_cpyaddrbase + inst->addr;
+		inst->type = XD3_NOOP;
+		inst->size = 0;
+	      }
+	    else
+	      {
+		/* In this case we have to read a source block, which could return control
+		 * to the caller.  We need to know the first block number needed for this
+		 * copy. */
+		xd3_source *source;
+		xoff_t block;
+		usize_t blkoff;
+		usize_t blksize;
+		int ret;
+
+	      more:
+
+		source  = stream->src;
+		block   = source->cpyoff_blocks;
+		blkoff  = source->cpyoff_blkoff + inst->addr;
+		blksize = source->blksize;
+
+ 		while (blkoff >= blksize)
+		  {
+		    block  += 1;
+		    blkoff -= blksize;
+		  }
+
+		if ((ret = xd3_getblk (stream, block)))
+		  {
+		    /* could be a XD3_GETSRCBLK failure. */
+		    return ret;
+		  }
+
+		src = source->curblk + blkoff;
+
+		/* This block either contains enough data or the source file is
+		 * short. */
+		if ((source->onblk != blksize) && (blkoff + take > source->onblk))
+		  {
+		    stream->msg = "source file too short";
+		    return EINVAL;
+
+		  }
+
+		XD3_ASSERT (blkoff != blksize);
+
+		if (blkoff + take <= blksize)
+		  {
+		    inst->type = XD3_NOOP;
+		    inst->size = 0;
+		  }
+		else
+		  {
+		    /* This block doesn't contain all the data, modify the instruction, do
+		     * not set to XD3_NOOP. */
+		    take = blksize - blkoff;
+		    inst->size -= take;
+		    inst->addr += take;
+		  }
+	      }
+	  }
+	else
+	  {
+	    /* For a target-window copy, we know the entire range is in-memory.  The
+	     * dec_tgtaddrbase is negatively offset by dec_cpylen because the addresses
+	     * start beyond that point. */
+	    src = stream->dec_tgtaddrbase + inst->addr;
+	    inst->type = XD3_NOOP;
+	    inst->size = 0;
+	  }
+
+ 	dst = stream->next_out + stream->avail_out;
+
+	stream->avail_out += take;
+
+	/* Can't just memcpy here due to possible overlap. */
+	for (i = take; i != 0; i -= 1)
+	  {
+	    *dst++ = *src++;
+	  }
+
+	take = inst->size;
+
+	/* If there is more to copy, call getblk again. */
+	if (inst->type != XD3_NOOP)
+	  {
+	    XD3_ASSERT (take > 0);
+	    goto more;
+	  }
+	else
+	  {
+	    XD3_ASSERT (take == 0);
+	  }
+      }
+    }
+
+  return 0;
+}
+
+static int
+xd3_decode_finish_window (xd3_stream *stream)
+{
+  stream->dec_winbytes  = 0;
+  stream->dec_state     = DEC_FINISH;
+
+  stream->data_sect.pos = 0;
+  stream->inst_sect.pos = 0;
+  stream->addr_sect.pos = 0;
+
+  return XD3_OUTPUT;
+}
+
+static int
+xd3_decode_sections (xd3_stream *stream)
+{
+  usize_t need, more, take;
+  int copy, ret;
+
+  if ((stream->flags & XD3_JUST_HDR) != 0)
+    {
+      /* Nothing left to do. */
+      return xd3_decode_finish_window (stream);
+    }
+
+  /* To avoid copying, need this much data available */
+  need = (stream->inst_sect.size +
+	  stream->addr_sect.size +
+	  stream->data_sect.size);
+
+  /* The window may be entirely processed. */
+  XD3_ASSERT (stream->dec_winbytes <= need);
+
+  /* Compute how much more input is needed. */
+  more = (need - stream->dec_winbytes);
+
+  /* How much to consume. */
+  take = min (more, stream->avail_in);
+
+  /* See if the input is completely available, to avoid copy. */
+  copy = (take != more);
+
+  /* If the window is skipped... */
+  if ((stream->flags & XD3_SKIP_WINDOW) != 0)
+    {
+      /* Skip the available input. */
+      DECODE_INPUT (take);
+
+      stream->dec_winbytes += take;
+
+      if (copy)
+	{
+	  stream->msg = "further input required";
+	  return XD3_INPUT;
+	}
+
+      return xd3_decode_finish_window (stream);
+    }
+
+  /* Process all but the DATA section. */
+  switch (stream->dec_state)
+    {
+    default:
+      stream->msg = "internal error";
+      return EINVAL;
+
+    case DEC_DATA:
+      if ((ret = xd3_decode_section (stream, & stream->data_sect, DEC_INST, copy))) { return ret; }
+    case DEC_INST:
+      if ((ret = xd3_decode_section (stream, & stream->inst_sect, DEC_ADDR, copy))) { return ret; }
+    case DEC_ADDR:
+      if ((ret = xd3_decode_section (stream, & stream->addr_sect, DEC_EMIT, copy))) { return ret; }
+    }
+
+  XD3_ASSERT (stream->dec_winbytes == need);
+
+#if SECONDARY_ANY
+#define DECODE_SECONDARY_SECTION(UPPER,LOWER) \
+  ((stream->dec_del_ind & VCD_ ## UPPER ## COMP) && \
+   (ret = xd3_decode_secondary (stream, & stream-> LOWER ## _sect, \
+					& xd3_sec_ ## LOWER (stream))))
+
+  if (DECODE_SECONDARY_SECTION (DATA, data) ||
+      DECODE_SECONDARY_SECTION (INST, inst) ||
+      DECODE_SECONDARY_SECTION (ADDR, addr))
+    {
+      return ret;
+    }
+#endif
+
+  if (stream->flags & XD3_SKIP_EMIT)
+    {
+      return xd3_decode_finish_window (stream);
+    }
+
+  /* OPT: A possible optimization is to avoid allocating memory in decode_setup_buffers
+   * and to avoid a large memcpy when the window consists of a single VCD_SOURCE copy
+   * instruction.  The only potential problem is if the following window is a VCD_TARGET,
+   * then you need to remember... */
+  if ((ret = xd3_decode_setup_buffers (stream))) { return ret; }
+
+  return 0;
+}
+
+static int
+xd3_decode_emit (xd3_stream *stream)
+{
+  int ret;
+
+  /* Produce output: originally structured to allow reentrant code that fills as much of
+   * the output buffer as possible, but VCDIFF semantics allows to copy from anywhere from
+   * the target window, so instead allocate a sufficiently sized buffer after the target
+   * window length is decoded.
+   *
+   * This code still needs to be reentrant to allow XD3_GETSRCBLK to return control.  This
+   * is handled by setting the stream->dec_currentN instruction types to XD3_NOOP after
+   * they have been processed. */
+  XD3_ASSERT (! (stream->flags & XD3_SKIP_EMIT));
+  XD3_ASSERT (stream->avail_out == 0);
+  XD3_ASSERT (stream->dec_tgtlen <= stream->space_out);
+
+  while (stream->inst_sect.buf != stream->inst_sect.buf_max)
+    {
+      /* Decode next instruction pair. */
+      if ((stream->dec_current1.type == XD3_NOOP) &&
+	  (stream->dec_current2.type == XD3_NOOP) &&
+	  (ret = xd3_decode_instruction (stream))) { return ret; }
+
+      /* Output for each instruction. */
+      if ((stream->dec_current1.type != XD3_NOOP) &&
+	  (ret = xd3_decode_output_halfinst (stream, & stream->dec_current1))) { return ret; }
+
+      if ((stream->dec_current2.type != XD3_NOOP) &&
+	  (ret = xd3_decode_output_halfinst (stream, & stream->dec_current2))) { return ret; }
+    }
+
+  if (stream->avail_out != stream->dec_tgtlen)
+    {
+      IF_DEBUG1 (P(RINT "AVAIL_OUT(%d) != DEC_TGTLEN(%d)\n", stream->avail_out, stream->dec_tgtlen));
+      stream->msg = "wrong window length";
+      return EINVAL;
+    }
+
+  if (stream->data_sect.buf != stream->data_sect.buf_max)
+    {
+      stream->msg = "extra data section";
+      return EINVAL;
+    }
+
+  if (stream->addr_sect.buf != stream->addr_sect.buf_max)
+    {
+      stream->msg = "extra address section";
+      return EINVAL;
+    }
+
+  /* OPT: Should cksum computation be combined with the above loop? */
+  if ((stream->dec_win_ind & VCD_ADLER32) != 0 &&
+      (stream->flags & XD3_ADLER32_NOVER) == 0)
+    {
+      uint32_t a32 = adler32 (1L, stream->next_out, stream->avail_out);
+
+      if (a32 != stream->dec_adler32)
+	{
+	  stream->msg = "target window checksum mismatch";
+	  return EINVAL;
+	}
+    }
+
+  /* Finished with a window. */
+  return xd3_decode_finish_window (stream);
+}
+
+int
+xd3_decode_input (xd3_stream *stream)
+{
+  int ret;
+
+  if (stream->enc_state != 0)
+    {
+      stream->msg = "encoder/decoder transition";
+      return EINVAL;
+    }
+
+#define BYTE_CASE(expr,x,nstate)                                               \
+      do {                                                                     \
+      if ( (expr) &&                                                           \
+           ((ret = xd3_decode_byte (stream, & (x))) != 0) ) { return ret; }    \
+      stream->dec_state = (nstate);                                            \
+      } while (0)
+
+#define OFFSET_CASE(expr,x,nstate)                                             \
+      do {                                                                     \
+      if ( (expr) &&                                                           \
+           ((ret = xd3_decode_offset (stream, & (x))) != 0) ) { return ret; }  \
+      stream->dec_state = (nstate);                                            \
+      } while (0)
+
+#define SIZE_CASE(expr,x,nstate)                                               \
+      do {                                                                     \
+      if ( (expr) &&                                                           \
+           ((ret = xd3_decode_size (stream, & (x))) != 0) ) { return ret; }    \
+      stream->dec_state = (nstate);                                            \
+      } while (0)
+
+#define SRCORTGT(x) (((x) & VCD_SRCORTGT) == VCD_SOURCE ||                     \
+		     ((x) & VCD_SRCORTGT) == VCD_TARGET)
+
+  switch (stream->dec_state)
+    {
+    case DEC_VCHEAD:
+      {
+	if ((ret = xd3_decode_bytes (stream, stream->dec_magic, & stream->dec_magicbytes, 4))) { return ret; }
+
+	if (stream->dec_magic[0] != VCDIFF_MAGIC1 ||
+	    stream->dec_magic[1] != VCDIFF_MAGIC2 ||
+	    stream->dec_magic[2] != VCDIFF_MAGIC3)
+	  {
+	    stream->msg = "not a VCDIFF input";
+	    return EINVAL;
+	  }
+
+	if (stream->dec_magic[3] != 0)
+	  {
+	    stream->msg = "VCDIFF input version > 0 is not supported";
+	    return EINVAL;
+	  }
+
+	stream->dec_state = DEC_HDRIND;
+      }
+    case DEC_HDRIND:
+      {
+	if ((ret = xd3_decode_byte (stream, & stream->dec_hdr_ind))) { return ret; }
+
+	if ((stream->dec_hdr_ind & VCD_INVHDR) != 0)
+	  {
+	    stream->msg = "unrecognized header indicator bits set";
+	    return EINVAL;
+	  }
+
+	stream->dec_state = DEC_SECONDID;
+      }
+
+    case DEC_SECONDID:
+      /* Secondary compressor ID: only if VCD_SECONDARY is set */
+      if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0)
+	{
+	  BYTE_CASE (1, stream->dec_secondid, DEC_TABLEN);
+
+	  switch (stream->dec_secondid)
+	    {
+	    case VCD_FGK_ID:
+	      FGK_CASE (stream);
+	    case VCD_DJW_ID:
+	      DJW_CASE (stream);
+	    default:
+	      stream->msg = "unknown secondary compressor ID";
+	      return EINVAL;
+	    }
+	}
+
+    case DEC_TABLEN:
+      /* Length of code table data: only if VCD_CODETABLE is set */
+      SIZE_CASE ((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->dec_codetblsz, DEC_NEAR);
+
+      /* The codetblsz counts the two NEAR/SAME bytes */
+      if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) {
+	if (stream->dec_codetblsz <= 2) {
+	  stream->msg = "invalid code table size";
+	  return ENOMEM;
+	}
+	stream->dec_codetblsz -= 2;
+      }
+    case DEC_NEAR:
+      /* Near modes: only if VCD_CODETABLE is set */
+      BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->acache.s_near, DEC_SAME);
+    case DEC_SAME:
+      /* Same modes: only if VCD_CODETABLE is set */
+      BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0, stream->acache.s_same, DEC_TABDAT);
+    case DEC_TABDAT:
+      /* Compressed code table data */
+
+      if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0)
+	{
+	  /* Get the code table data. */
+	  if ((stream->dec_codetbl == NULL) &&
+	      (stream->dec_codetbl = xd3_alloc (stream, stream->dec_codetblsz, 1)) == NULL) { return ENOMEM; }
+
+	  if ((ret = xd3_decode_bytes (stream, stream->dec_codetbl, & stream->dec_codetblbytes, stream->dec_codetblsz)))
+	    {
+	      return ret;
+	    }
+
+	  if ((ret = xd3_apply_table_encoding (stream, stream->dec_codetbl, stream->dec_codetblbytes)))
+	    {
+	      return ret;
+	    }
+	}
+      else
+	{
+	  /* Use the default table. */
+	  stream->acache.s_near = __rfc3284_code_table_desc.near_modes;
+	  stream->acache.s_same = __rfc3284_code_table_desc.same_modes;
+	  stream->code_table    = xd3_rfc3284_code_table ();
+	}
+
+      if ((ret = xd3_alloc_cache (stream))) { return ret; }
+
+      stream->dec_state = DEC_APPLEN;
+
+    case DEC_APPLEN:
+      /* Length of application data */
+      SIZE_CASE((stream->dec_hdr_ind & VCD_APPHEADER) != 0, stream->dec_appheadsz, DEC_APPDAT);
+
+    case DEC_APPDAT:
+      /* Application data */
+      if (stream->dec_hdr_ind & VCD_APPHEADER)
+	{
+	  /* Note: we add an additional byte for padding, to allow 0-termination. */
+	  if ((stream->dec_appheader == NULL) &&
+	      (stream->dec_appheader = xd3_alloc (stream, stream->dec_appheadsz+1, 1)) == NULL) { return ENOMEM; }
+
+	  stream->dec_appheader[stream->dec_appheadsz] = 0;
+
+	  if ((ret = xd3_decode_bytes (stream, stream->dec_appheader, & stream->dec_appheadbytes, stream->dec_appheadsz)))
+	    {
+	      return ret;
+	    }
+	}
+
+      stream->dec_hdrsize = stream->total_in;
+      stream->dec_state = DEC_WININD;
+
+    case DEC_WININD:
+      {
+	/* Start of a window: the window indicator */
+
+	if ((ret = xd3_decode_byte (stream, & stream->dec_win_ind))) { return ret; }
+
+	stream->current_window = stream->dec_window_count;
+
+	if (XOFF_T_OVERFLOW (stream->dec_winstart, stream->dec_tgtlen))
+	  {
+	    stream->msg = "decoder file offset overflow";
+	    return EINVAL;
+	  }
+
+	stream->dec_winstart += stream->dec_tgtlen;
+
+	if ((stream->dec_win_ind & VCD_INVWIN) != 0)
+	  {
+	    stream->msg = "unrecognized window indicator bits set";
+	    return EINVAL;
+	  }
+
+	if ((ret = xd3_decode_init_window (stream))) { return ret; }
+
+	stream->dec_state = DEC_CPYLEN;
+
+	IF_DEBUG1 (P(RINT "--------- TARGET WINDOW %"Q"u ------------------\n", stream->current_window));
+      }
+
+    case DEC_CPYLEN:
+      /* Copy window length: only if VCD_SOURCE or VCD_TARGET is set */
+      SIZE_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpylen, DEC_CPYOFF);
+
+      /* Set the initial, logical decoder position (HERE address) in dec_position.  This
+       * is set to just after the source/copy window, as we are just about to output the
+       * first byte of target window. */
+      stream->dec_position = stream->dec_cpylen;
+
+    case DEC_CPYOFF:
+      /* Copy window offset: only if VCD_SOURCE or VCD_TARGET is set */
+      OFFSET_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpyoff, DEC_ENCLEN);
+
+      /* Copy offset and copy length may not overflow. */
+      if (XOFF_T_OVERFLOW (stream->dec_cpyoff, stream->dec_cpylen))
+	{
+	  stream->msg = "decoder copy window overflows a file offset";
+	  return EINVAL;
+	}
+
+      /* Check copy window bounds: VCD_TARGET window may not exceed current position. */
+      if ((stream->dec_win_ind & VCD_TARGET) &&
+	  (stream->dec_cpyoff + (xoff_t) stream->dec_cpylen > stream->dec_winstart))
+	{
+	  stream->msg = "VCD_TARGET window out of bounds";
+	  return EINVAL;
+	}
+
+    case DEC_ENCLEN:
+      /* Length of the delta encoding */
+      SIZE_CASE(1, stream->dec_enclen, DEC_TGTLEN);
+    case DEC_TGTLEN:
+      /* Length of target window */
+      SIZE_CASE(1, stream->dec_tgtlen, DEC_DELIND);
+
+      /* Set the maximum decoder position, beyond which we should not decode any data.
+       * This is the maximum value for dec_position.  This may not exceed the size of a
+       * usize_t. */
+      if (USIZE_T_OVERFLOW (stream->dec_cpylen, stream->dec_tgtlen))
+	{
+	  stream->msg = "decoder target window overflows a usize_t";
+	  return EINVAL;
+	}
+
+      /* Check for malicious files. */
+      if (stream->dec_tgtlen > XD3_HARDMAXWINSIZE)
+	{
+	  stream->msg = "hard window size exceeded";
+	  return EINVAL;
+	}
+
+      stream->dec_maxpos = stream->dec_cpylen + stream->dec_tgtlen;
+
+    case DEC_DELIND:
+      /* Delta indicator */
+      BYTE_CASE(1, stream->dec_del_ind, DEC_DATALEN);
+
+      if ((stream->dec_del_ind & VCD_INVDEL) != 0)
+	{
+	  stream->msg = "unrecognized delta indicator bits set";
+	  return EINVAL;
+	}
+
+      /* Delta indicator is only used with secondary compression. */
+      if ((stream->dec_del_ind != 0) && (stream->sec_type == NULL))
+	{
+	  stream->msg = "invalid delta indicator bits set";
+	  return EINVAL;
+	}
+
+      /* Section lengths */
+    case DEC_DATALEN:
+      SIZE_CASE(1, stream->data_sect.size, DEC_INSTLEN);
+    case DEC_INSTLEN:
+      SIZE_CASE(1, stream->inst_sect.size, DEC_ADDRLEN);
+    case DEC_ADDRLEN:
+      SIZE_CASE(1, stream->addr_sect.size, DEC_CKSUM);
+
+    case DEC_CKSUM:
+      /* Window checksum. */
+      if ((stream->dec_win_ind & VCD_ADLER32) != 0)
+	{
+	  int i;
+
+	  if ((ret = xd3_decode_bytes (stream, stream->dec_cksum, & stream->dec_cksumbytes, 4))) { return ret; }
+
+	  for (i = 0; i < 4; i += 1)
+	    {
+	      stream->dec_adler32 = (stream->dec_adler32 << 8) | stream->dec_cksum[i];
+	    }
+	}
+
+      stream->dec_state = DEC_DATA;
+
+      /* Check dec_enclen for redundency, otherwise it is not really used. */
+      {
+	usize_t enclen_check = (1 + (xd3_sizeof_size (stream->dec_tgtlen) +
+				    xd3_sizeof_size (stream->data_sect.size) +
+				    xd3_sizeof_size (stream->inst_sect.size) +
+				    xd3_sizeof_size (stream->addr_sect.size)) +
+			       stream->data_sect.size +
+			       stream->inst_sect.size +
+			       stream->addr_sect.size +
+			       ((stream->dec_win_ind & VCD_ADLER32) ? 4 : 0));
+
+	if (stream->dec_enclen != enclen_check)
+	  {
+	    stream->msg = "incorrect encoding length (redundent)";
+	    return EINVAL;
+	  }
+      }
+
+      /* Returning here gives the application a chance to inspect the header, skip the
+       * window, etc. */
+      if (stream->current_window == 0) { return XD3_GOTHEADER; }
+      else                             { return XD3_WINSTART; }
+
+    case DEC_DATA:
+    case DEC_INST:
+    case DEC_ADDR:
+      /* Next read the three sections. */
+     if ((ret = xd3_decode_sections (stream))) { return ret; }
+
+    case DEC_EMIT:
+
+      /* To speed VCD_SOURCE block-address calculations, the source cpyoff_blocks and
+       * cpyoff_blkoff are pre-computed. */
+      if (stream->dec_win_ind & VCD_SOURCE)
+	{
+	  xd3_source *src = stream->src;
+
+	  if (src == NULL)
+	    {
+	      stream->msg = "source input required";
+	      return EINVAL;
+	    }
+
+	  src->cpyoff_blocks = stream->dec_cpyoff / src->blksize;
+	  src->cpyoff_blkoff = stream->dec_cpyoff % src->blksize;
+	}
+
+      /* xd3_decode_emit returns XD3_OUTPUT on every success. */
+      if ((ret = xd3_decode_emit (stream)) == XD3_OUTPUT)
+	{
+	  stream->total_out += (xoff_t) stream->avail_out;
+	}
+
+      return ret;
+
+    case DEC_FINISH:
+      {
+	if (stream->dec_win_ind & VCD_TARGET)
+	  {
+	    if (stream->dec_lastwin == NULL)
+	      {
+		stream->dec_lastwin   = stream->next_out;
+		stream->dec_lastspace = stream->space_out;
+	      }
+	    else
+	      {
+		xd3_swap_uint8p (& stream->dec_lastwin,   & stream->next_out);
+		xd3_swap_usize_t (& stream->dec_lastspace, & stream->space_out);
+	      }
+	  }
+
+	stream->dec_lastlen   = stream->dec_tgtlen;
+	stream->dec_laststart = stream->dec_winstart;
+	stream->dec_window_count += 1;
+
+	/* Note: the updates to dec_winstart & current_window are deferred until after the
+	 * next DEC_WININD byte is read. */
+	stream->dec_state = DEC_WININD;
+	return XD3_WINFINISH;
+      }
+
+    default:
+      stream->msg = "invalid state";
+      return EINVAL;
+    }
+}
+
+/******************************************************************************************
+ String matching helpers
+ ******************************************************************************************/
+
+#if XD3_ENCODER
+/* Do the initial xd3_string_match() checksum table setup.  Allocations are delayed until
+ * first use to avoid allocation sometimes (e.g., perfect matches, zero-length inputs). */
+static int
+xd3_string_match_init (xd3_stream *stream)
+{
+  const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS);
+  const int DO_LARGE = (stream->src != NULL);
+
+  if (DO_SMALL)
+    {
+      /* Subsequent calls can return immediately after checking reset. */
+      if (stream->small_table != NULL)
+	{
+	  /* The target hash table is reinitialized once per window. */
+	  if (stream->small_reset)
+	    {
+	      stream->small_reset = 0;
+	      memset (stream->small_table, 0, sizeof (usize_t) * stream->small_hash.size);
+	    }
+
+	  return 0;
+	}
+
+      if ((stream->small_table = xd3_alloc0 (stream, stream->small_hash.size, sizeof (usize_t))) == NULL)
+	{
+	  return ENOMEM;
+	}
+
+      /* If there is a previous table needed. */
+      if (stream->small_chain > 1)
+	{
+	  xd3_slist *p, *m;
+
+	  if ((stream->small_prev = xd3_alloc (stream, stream->sprevsz, sizeof (xd3_slist))) == NULL)
+	    {
+	      return ENOMEM;
+	    }
+
+	  /* Initialize circular lists. */
+	  for (p = stream->small_prev, m = stream->small_prev + stream->sprevsz; p != m; p += 1)
+	    {
+	      p->next = p;
+	      p->prev = p;
+	    }
+	}
+    }
+
+  if (DO_LARGE && stream->large_table == NULL)
+    {
+      if ((stream->large_table = xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL)
+	{
+	  return ENOMEM;
+	}
+    }
+
+  return 0;
+}
+
+/* Called at every entrance to the string-match loop and each time
+ * stream->input_position the value returned as *next_move_point.
+ * This function computes more source checksums to advance the window. */
+static int
+xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point)
+{
+  // The input offset at which the source should ideally be scanned
+  xoff_t logical_input_cksum_pos = stream->total_in + pos_in + stream->srcwin_size;
+
+  if (stream->srcwin_cksum_pos >= stream->src->size)
+    {
+      *next_move_point = USIZE_T_MAX;
+      return 0;
+    }
+
+  if (stream->srcwin_cksum_pos > logical_input_cksum_pos)
+    {
+      *next_move_point = stream->srcwin_cksum_pos - logical_input_cksum_pos;
+      return 0;
+    }
+
+  IF_DEBUG1 (P(RINT "[move_p1] size=%d T=%"Q"d S=%"Q"d\n", stream->srcwin_size,
+	       stream->total_in + pos_in, stream->srcwin_cksum_pos));
+
+  *next_move_point = pos_in + stream->srcwin_size;
+
+  if (stream->srcwin_cksum_pos == 0)
+    {
+      // Two windows to start with
+      logical_input_cksum_pos += stream->srcwin_size;
+    }
+  else
+    {
+      // Otherwise double and add
+      stream->srcwin_size = min(stream->srcwin_maxsz, stream->srcwin_size * 2);
+      logical_input_cksum_pos += stream->srcwin_size;
+    }
+
+  while (stream->srcwin_cksum_pos < logical_input_cksum_pos &&
+	 stream->srcwin_cksum_pos < stream->src->size)
+    {
+      xoff_t  blkno = stream->srcwin_cksum_pos / stream->src->blksize;
+      usize_t blkoff = stream->srcwin_cksum_pos % stream->src->blksize;
+      usize_t onblk = xd3_bytes_on_srcblk (stream->src, blkno);
+      int ret;
+
+      if (blkoff + stream->large_look >= onblk)
+	{
+	  /* Next block */
+	  stream->srcwin_cksum_pos = (blkno * stream->src->blksize) + onblk;
+	  continue;
+	}
+
+      if ((ret = xd3_getblk (stream, blkno)))
+	{
+	  return ret;
+	}
+
+      usize_t diff = logical_input_cksum_pos - stream->srcwin_cksum_pos;
+
+      onblk = min(onblk, diff + blkoff + stream->large_look);
+
+      while (blkoff + stream->large_look <= onblk)
+	{
+	  uint32_t cksum = xd3_lcksum (stream->src->curblk + blkoff, stream->large_look);
+	  usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum);
+
+	  stream->large_table[hval] = stream->srcwin_cksum_pos + HASH_CKOFFSET;
+
+	  blkoff += stream->large_step;
+	  stream->srcwin_cksum_pos += stream->large_step;
+	  IF_DEBUG (stream->large_ckcnt += 1);
+	}
+    }
+
+  IF_DEBUG1 (P(RINT "[move_p2] size=%d T=%"Q"d S=%"Q"d next_move=%d\n", stream->srcwin_size,
+	       stream->total_in + pos_in, stream->srcwin_cksum_pos, *next_move_point));
+
+  return 0;
+}
+
+/* This function sets up the stream->src fields srcbase, srclen.  The call is delayed
+ * until these values are needed to encode a copy address.  At this point the decision has
+ * to be made. */
+static int
+xd3_srcwin_setup (xd3_stream *stream)
+{
+  xd3_source *src = stream->src;
+  xoff_t length;
+
+  IF_DEBUG1 (P(RINT "[srcwin setup:%"Q"u] iopt buffer %s\n",
+		stream->current_window,
+		stream->enc_state < ENC_FLUSH ? "overflow" : "fit"));
+
+  /* Check the undecided state. */
+  XD3_ASSERT (src->srclen == 0 && src->srcbase == 0);
+
+  /* Avoid repeating this call. */
+  stream->srcwin_decided = 1;
+
+  /* If the stream is flushing, then the iopt buffer was able to contain the complete
+   * encoding.  If no copies were issued no source window is actually needed.  This
+   * prevents the VCDIFF header from including source base/len.  xd3_emit_hdr checks
+   * for srclen == 0. */
+  if (stream->enc_state == ENC_FLUSH && stream->match_maxaddr == 0)
+    {
+      goto done;
+    }
+
+  /* Check for overflow, srclen is usize_t - this can't happen unless XD3_DEFAULT_SRCBACK
+   * and related parameters are extreme - should use smaller windows. */
+  length = stream->match_maxaddr - stream->match_minaddr;
+
+  if (length > (xoff_t) USIZE_T_MAX)
+    {
+      stream->msg = "source window length overflow (not 64bit)";
+      return EINVAL;
+    }
+
+  /* If ENC_FLUSH, then we know the exact source window to use because no more copies can
+   * be issued. */
+  if (stream->enc_state == ENC_FLUSH)
+    {
+      src->srcbase = stream->match_minaddr;
+      src->srclen  = (usize_t) length;
+      XD3_ASSERT (src->srclen);
+      goto done;
+    }
+
+  /* Otherwise, we have to make a guess.  More copies may still be issued, but we have to
+   * decide the source window base and length now.  */
+  src->srcbase = stream->match_minaddr;
+  src->srclen  = max ((usize_t) length, stream->avail_in + (stream->avail_in >> 2));
+  if (src->size < src->srcbase + (xoff_t) src->srclen)
+    {
+      /* Could reduce srcbase, as well. */
+      src->srclen = src->size - src->srcbase;
+    }
+
+  XD3_ASSERT (src->srclen);
+ done:
+  IF_DEBUG1 (P(RINT "[srcwin setup:%"Q"u] base %"Q"u size %u\n",
+		      stream->current_window,
+		      src->srcbase,
+		      src->srclen));
+  /* Set the taroff.  This convenience variable is used even when stream->src == NULL. */
+  stream->taroff = src->srclen;
+  return 0;
+}
+
+/* Sets the bounding region for a newly discovered source match, prior to calling
+ * xd3_source_extend_match().  This sets the match_maxfwd, match_maxback variables.  Note:
+ * srcpos is an absolute position (xoff_t) but the match_maxfwd, match_maxback variables
+ * are usize_t.  Returns 0 if the setup succeeds, or 1 if the source position lies outside
+ * an already-decided srcbase/srclen window. */
+static int
+xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
+{
+  xd3_source *src = stream->src;
+  usize_t greedy_or_not;
+  xoff_t farthest_src;
+
+  stream->match_maxback = 0;
+  stream->match_maxfwd  = 0;
+  stream->match_back    = 0;
+  stream->match_fwd     = 0;
+
+  farthest_src = max(stream->srcwin_cksum_pos, srcpos);
+
+  XD3_ASSERT (stream->srcwin_maxsz > src->blksize);
+
+  /* This prevents the encoder from seeking back more than srcwin_maxsz.  Using
+   * srcwin_maxsz is incorrect.  TODO: Possibly an new option here, how far back to
+   * seek? */
+  if (max_in == 0 ||
+      farthest_src - srcpos > stream->srcwin_maxsz - src->blksize)
+    {
+      goto bad;  // TODO! Note: this prevents catching the TODO/bug below
+    }
+
+  /* TODO: check for boundary crossing */
+
+  /* Going backwards, the 1.5-pass algorithm allows some already-matched input may be
+   * covered by a longer source match.  The greedy algorithm does not allow this. */
+  if (stream->flags & XD3_BEGREEDY)
+    {
+      /* The greedy algorithm allows backward matching to the last matched position. */
+      greedy_or_not = xd3_iopt_last_matched (stream);
+    }
+  else
+    {
+      /* The 1.5-pass algorithm allows backward matching to go back as far as the
+       * unencoded offset, which is updated as instructions pass out of the iopt buffer.
+       * If this (default) is chosen, it means xd3_iopt_erase may be called to eliminate
+       * instructions when a covering source match is found. */
+      greedy_or_not = stream->unencoded_offset;
+    }
+
+  /* Backward target match limit. */
+  XD3_ASSERT (pos_in >= greedy_or_not);
+  stream->match_maxback = pos_in - greedy_or_not;
+
+  /* Forward target match limit. */
+  XD3_ASSERT (max_in > pos_in);
+  stream->match_maxfwd = max_in - pos_in;
+
+  /* Now we take the source position into account.  It depends whether the srclen/srcbase
+   * have been decided yet. */
+  if (stream->srcwin_decided == 0)
+    {
+      /* Unrestricted case: the match can cover the entire source, 0--src->size.  We
+       * compare the usize_t match_maxfwd/match_maxback against the xoff_t src->size/srcpos values
+       * and take the min. */
+      xoff_t srcavail;
+
+      if (srcpos < (xoff_t) stream->match_maxback)
+	{
+	  stream->match_maxback = srcpos;
+	}
+
+      srcavail = src->size - srcpos;
+      if (srcavail < (xoff_t) stream->match_maxfwd)
+	{
+	  stream->match_maxfwd = srcavail;
+	}
+
+      goto good;
+    }
+
+  /* Decided some source window. */
+  XD3_ASSERT (src->srclen > 0);
+
+  /* Restricted case: fail if the srcpos lies outside the source window */
+  if ((srcpos < src->srcbase) || (srcpos > (src->srcbase + (xoff_t) src->srclen)))
+    {
+      goto bad;
+    }
+  else
+    {
+      usize_t srcavail;
+
+      srcavail = (usize_t) (srcpos - src->srcbase);
+      if (srcavail < stream->match_maxback)
+	{
+	  stream->match_maxback = srcavail;
+	}
+
+      srcavail = (usize_t) (src->srcbase + (xoff_t) src->srclen - srcpos);
+      if (srcavail < stream->match_maxfwd)	{
+	  stream->match_maxfwd = srcavail;
+	}
+
+      goto good;
+    }
+
+ good:
+  stream->match_state  = MATCH_BACKWARD;
+  stream->match_srcpos = srcpos;
+  return 0;
+
+ bad:
+  stream->match_state  = MATCH_SEARCHING;
+  return 1;
+}
+
+/* This function expands the source match backward and forward.  It is reentrant, since
+ * xd3_getblk may return XD3_GETSRCBLK, so most variables are kept in xd3_stream.  There
+ * are two callers of this function, the string_matching routine when a checksum match is
+ * discovered, and xd3_encode_input whenever a continuing (or initial) match is suspected.
+ * The two callers do different things with the input_position, thus this function leaves
+ * that variable untouched.  If a match is taken the resulting stream->match_fwd is left
+ * non-zero. */
+static int
+xd3_source_extend_match (xd3_stream *stream)
+{
+  int ret;
+  xd3_source *src = stream->src;
+  xoff_t matchoff;  /* matchoff is the current right/left-boundary of the source match being tested. */
+  usize_t streamoff; /* streamoff is the current right/left-boundary of the input match being tested. */
+  xoff_t tryblk;    /* tryblk, tryoff are the block, offset position of matchoff */
+  usize_t tryoff;
+  usize_t tryrem;    /* tryrem is the number of matchable bytes on the source block */
+
+  XD3_ASSERT (src != NULL);
+
+  /* Does it make sense to compute backward match AFTER forward match? */
+  if (stream->match_state == MATCH_BACKWARD)
+    {
+      /* Note: this code is practically duplicated below, substituting
+       * match_fwd/match_back and direction.  Consolidate? */
+      matchoff  = stream->match_srcpos - stream->match_back;
+      streamoff = pos_in - stream->match_back;
+      tryblk    = matchoff / src->blksize;
+      tryoff    = matchoff % src->blksize;
+
+      /* this loops backward over source blocks */
+      while (stream->match_back < stream->match_maxback)
+	{
+	  /* see if we're backing across a source block boundary */
+	  if (tryoff == 0)
+	    {
+	      tryoff  = src->blksize;
+	      tryblk -= 1;
+	    }
+
+	  if ((ret = xd3_getblk (stream, tryblk)))
+	    {
+	      /* could be a XD3_GETSRCBLK failure. */
+	      return ret;
+	    }
+
+	  /* OPT: This code can be optimized. */
+	  for (tryrem = min (tryoff, stream->match_maxback - stream->match_back);
+	       tryrem != 0;
+	       tryrem -= 1, stream->match_back += 1)
+	    {
+	      if (src->curblk[tryoff-1] != stream->next_in[streamoff-1])
+		{
+		  goto doneback;
+		}
+
+	      tryoff    -= 1;
+	      streamoff -= 1;
+	    }
+	}
+
+    doneback:
+      stream->match_state = MATCH_FORWARD;
+    }
+
+  XD3_ASSERT (stream->match_state == MATCH_FORWARD);
+
+  matchoff  = stream->match_srcpos + stream->match_fwd;
+  streamoff = pos_in + stream->match_fwd;
+  tryblk    = matchoff / src->blksize;
+  tryoff    = matchoff % src->blksize;
+
+  /* Note: practically the same code as backwards case above: same comments */
+  while (stream->match_fwd < stream->match_maxfwd)
+    {
+      if ((ret = xd3_getblk (stream, tryblk)))
+	{
+	  return ret;
+	}
+
+      /* There's a good speedup for doing word comparions: see zlib. */
+      for (tryrem = min(stream->match_maxfwd - stream->match_fwd,
+			src->blksize - tryoff);
+	   tryrem != 0;
+	   tryrem -= 1, stream->match_fwd += 1)
+	{
+	  if (src->curblk[tryoff] != stream->next_in[streamoff])
+	    {
+	      goto donefwd;
+	    }
+
+	  tryoff    += 1;
+	  streamoff += 1;
+	}
+
+      if (tryoff == src->blksize)
+	{
+	  tryoff  = 0;
+	  tryblk += 1;
+	}
+    }
+
+ donefwd:
+  stream->match_state = MATCH_SEARCHING;
+
+  /* Now decide whether to take the match.  There are several ways to answer this
+   * question and this is likely the best answer.  There is currently an assertion
+   * in xd3_iopt_erase that checks whether min_match works.  This variable maintains
+   * that every match exceeds the end of the previous match.  However, it is
+   * possible that match_back allows us to find a match that goes a long way back
+   * but not enough forward.  We could try an alternate approach, which might help
+   * or it might just be extra complexity: eliminate the next match_fwd >= min_match
+   * test and call xd3_iopt_erase right away.  Erase instructions as far as it goes
+   * back, then either remember what was deleted and re-insert it, or count on the
+   * string-matching algorithm to find that match again.  I think it is more
+   * worthwhile to implement large_hash duplicates. */
+  if (stream->match_fwd < min_match)
+    {
+      stream->match_fwd = 0;
+    }
+  else
+    {
+      usize_t total  = stream->match_fwd + stream->match_back;
+      xoff_t match_end;
+
+      /* Correct the variables to remove match_back from the equation. */
+      stream->input_position -= stream->match_back;
+      stream->match_srcpos   -= stream->match_back;
+      stream->match_fwd      += stream->match_back;
+      match_end               = stream->match_srcpos + stream->match_fwd;
+
+      /* At this point we may have to erase any iopt-buffer instructions that are
+       * fully covered by a backward-extending copy. */
+      if (stream->match_back > 0)
+	{
+	  xd3_iopt_erase (stream, pos_in, total);
+	}
+
+      stream->match_back = 0;
+
+      /* Update ranges.  The first source match occurs with both values set to 0. */
+      if (stream->match_maxaddr == 0 ||
+	  stream->match_srcpos < stream->match_minaddr)
+	{
+	  stream->match_minaddr = stream->match_srcpos;
+	}
+
+      if (match_end > stream->match_maxaddr)
+	{
+	  stream->match_maxaddr = match_end;
+	}
+
+      IF_DEBUG1 ({
+	static int x = 0;
+	P(RINT "[source match:%d] <inp %"Q"u %"Q"u>  <src %"Q"u %"Q"u> (%s) [ %u bytes ]\n",
+	   x++,
+	   stream->total_in + pos_in,
+	   stream->total_in + pos_in + stream->match_fwd,
+	   stream->match_srcpos,
+	   stream->match_srcpos + stream->match_fwd,
+	   (stream->total_in + stream->input_position == stream->match_srcpos) ? "same" : "diff",
+	   stream->match_fwd);
+      });
+
+      if ((ret = xd3_found_match (stream,
+				/* decoder position */ pos_in,
+				/* length */ stream->match_fwd,
+				/* address */ stream->match_srcpos,
+				/* is_source */ 1)))
+	{
+	  return ret;
+	}
+
+      // TODO: ideally, we would update srcwin_cksum_pos to avoid computing checksums in
+      // the middle of an already-discovered long match.
+
+      /* If the match ends with the available input: */
+      if (pos_in + stream->match_fwd == max_in)
+	{
+	  /* Setup continuing match for the next window. */
+	  stream->match_state   = MATCH_TARGET;
+	  stream->match_srcpos += stream->match_fwd;
+	}
+    }
+
+  return 0;
+}
+
+/* Update the small hash.  Values in the small_table are offset by HASH_CKOFFSET (1) to
+ * distinguish empty buckets the zero offset.  This maintains the previous linked lists.
+ * If owrite is true then this entry is replacing the existing record, otherwise it is
+ * merely being called to promote the existing record in the hash bucket (for the same
+ * address cache). */
+static void
+xd3_scksum_insert (xd3_stream *stream, usize_t inx, usize_t scksum, usize_t pos)
+{
+  /* If we are maintaining previous links. */
+  if (stream->small_prev)
+    {
+      usize_t     last_pos = stream->small_table[inx];
+      xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask];
+      xd3_slist *prev     = pos_list->prev;
+      xd3_slist *next     = pos_list->next;
+
+      /* Assert link structure, update pos, cksum */
+      XD3_ASSERT (prev->next == pos_list);
+      XD3_ASSERT (next->prev == pos_list);
+      pos_list->pos = pos;
+      pos_list->scksum = scksum;
+
+      /* Subtract HASH_CKOFFSET and test for a previous offset. */
+      if (last_pos-- != 0)
+	{
+	  xd3_slist *last_list = & stream->small_prev[last_pos & stream->sprevmask];
+	  xd3_slist *last_next;
+
+	  /* Verify existing entry. */
+	  SMALL_HASH_DEBUG1 (stream, stream->next_in + last_pos);
+	  SMALL_HASH_DEBUG2 (stream, stream->next_in + pos);
+
+	  /* The two positions (mod sprevsz) may have the same checksum, making the old
+	   * and new entries the same.  That is why the removal step is not before the
+	   * above if-stmt. */
+	  if (last_list != pos_list)
+	    {
+	      /* Remove current position from any list it may belong to. */
+	      next->prev = prev;
+	      prev->next = next;
+
+	      /* The ordinary case, add current position to last_list. */
+	      last_next = last_list->next;
+
+	      pos_list->next = last_next;
+	      pos_list->prev = last_list;
+
+	      last_next->prev = pos_list;
+	      last_list->next = pos_list;
+	    }
+	}
+      else
+	{
+	  /* Remove current position from any list it may belong to. */
+	  next->prev = prev;
+	  prev->next = next;
+
+	  /* Re-initialize current position. */
+	  pos_list->next = pos_list;
+	  pos_list->prev = pos_list;
+	}
+    }
+
+  /* Enter the new position into the hash bucket. */
+  stream->small_table[inx] = pos + HASH_CKOFFSET;
+}
+
+#if XD3_DEBUG
+static int
+xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0,
+		  const uint8_t *inp_max, usize_t cmp_len)
+{
+  int i;
+
+  for (i = 0; i < cmp_len; i += 1)
+    {
+      XD3_ASSERT (ref0[i] == inp0[i]);
+    }
+
+  if (inp0 + cmp_len < inp_max)
+    {
+      XD3_ASSERT (inp0[i] != ref0[i]);
+    }
+
+  return 1;
+}
+#endif /* XD3_DEBUG */
+
+/* When the hash table indicates a possible small string match, it calls this routine to
+ * find the best match.  The first matching position is taken from the small_table,
+ * HASH_CKOFFSET is subtracted to get the actual position.  After checking that match, if
+ * previous linked lists are in use (because stream->small_chain > 1), previous matches
+ * are tested searching for the longest match.  If (min_match > MIN_MATCH) then a lazy
+ * match is in effect.
+ *
+ * OPT: This is by far the most expensive function.  The slowdown is in part due to the data
+ * structure it maintains, which is relatively more expensive than it needs to be (in
+ * comparison to zlib) in order to support the PROMOTE decision, which is to prefer the
+ * most recently used matching address of a certain string to aid the VCDIFF same cache.
+ *
+ * Weak reasoning? it's time to modularize this routine...?  Let's say the PROMOTE
+ * feature supported by this slow data structure contributes around 2% improvement in
+ * compressed size, is there a better code table that doesn't use the SAME address cache,
+ * for which the speedup-discount could produce a better encoding?
+ */
+static /*inline*/ usize_t
+xd3_smatch (xd3_stream *stream, usize_t base, usize_t scksum, usize_t *match_offset)
+{
+  usize_t         cmp_len;
+  usize_t         match_length = 0;
+  usize_t         chain        = (min_match == MIN_MATCH ?
+				  stream->small_chain :
+				  stream->small_lchain);
+  xd3_slist     *current      = NULL;
+  xd3_slist     *first        = NULL;
+  const uint8_t *inp_max      = stream->next_in + max_in;
+  const uint8_t *inp;
+  const uint8_t *ref;
+
+  SMALL_HASH_STATS  (usize_t search_cnt = 0);
+  SMALL_HASH_DEBUG1 (stream, stream->next_in + pos_in);
+  SMALL_HASH_STATS  (stream->sh_searches += 1);
+
+  XD3_ASSERT (min_match + pos_in <= max_in);
+
+  base -= HASH_CKOFFSET;
+
+  /* Initialize the chain. */
+  if (stream->small_prev != NULL)
+    {
+      first = current = & stream->small_prev[base & stream->sprevmask];
+
+      /* Check if current->pos is correct. */
+      if (current->pos != base) { goto done; }
+    }
+
+ again:
+
+  SMALL_HASH_STATS (search_cnt += 1);
+
+  /* For small matches, we can always go to the end-of-input because the matching position
+   * must be less than the input position. */
+  XD3_ASSERT (base < pos_in);
+
+  ref = stream->next_in + base;
+  inp = stream->next_in + pos_in;
+
+  SMALL_HASH_DEBUG2 (stream, ref);
+
+  /* Expand potential match forward. */
+  while (inp < inp_max && *inp == *ref)
+    {
+      ++inp;
+      ++ref;
+    }
+
+  cmp_len = inp - (stream->next_in + pos_in);
+
+  /* Verify correctness */
+  XD3_ASSERT (xd3_check_smatch (stream->next_in + base, stream->next_in + pos_in,
+				inp_max, cmp_len));
+
+  /* Update longest match */
+  if (cmp_len > match_length)
+    {
+      ( match_length) = cmp_len;
+      (*match_offset) = base;
+
+      /* Stop if we match the entire input or discover a long_enough match. */
+      if (inp == inp_max || cmp_len >= stream->long_enough)
+	{
+	  goto done;
+	}
+    }
+
+  /* If we have not reached the chain limit, see if there is another previous position. */
+  if (current)
+    {
+      while (--chain != 0)
+	{
+	  /* Calculate the next base offset. */
+	  current = current->prev;
+	  base    = current->pos;
+
+	  /* Stop if the next position was the first.  Stop if the position is wrong
+	   * (because the lists are not re-initialized across input windows). Skip if the
+	   * scksum is wrong. */
+	  if (current != first && base < pos_in)
+	    {
+	      if (current->scksum != scksum)
+		{
+		  continue;
+		}
+	      goto again;
+	    }
+	}
+    }
+
+ done:
+  SMALL_HASH_STATS (stream->sh_compares += search_cnt);
+  return match_length;
+}
+
+#if XD3_DEBUG
+static void
+xd3_verify_small_state (xd3_stream    *stream,
+			const uint8_t *inp,
+			uint32_t          x_cksum)
+{
+  uint32_t cksum = xd3_scksum (inp, stream->small_look);
+
+  XD3_ASSERT (cksum == x_cksum);
+}
+
+static void
+xd3_verify_large_state (xd3_stream    *stream,
+			const uint8_t *inp,
+			uint32_t          x_cksum)
+{
+  uint32_t cksum = xd3_lcksum (inp, stream->large_look);
+
+  XD3_ASSERT (cksum == x_cksum);
+}
+
+static void
+xd3_verify_run_state (xd3_stream    *stream,
+		      const uint8_t *inp,
+		      int            x_run_l,
+		      uint8_t        x_run_c)
+{
+  int     slook = stream->small_look;
+  uint8_t run_c;
+  int     run_l = xd3_comprun (inp, slook, &run_c);
+
+  XD3_ASSERT (run_l == 0 || run_c == x_run_c);
+  XD3_ASSERT (x_run_l > slook || run_l == x_run_l);
+}
+#endif /* XD3_DEBUG */
+#endif /* XD3_ENCODER */
+
+/******************************************************************************************
+ TEMPLATE pass
+ ******************************************************************************************/
+
+#endif /* __XDELTA3_C_INLINE_PASS__ */
+#ifdef __XDELTA3_C_TEMPLATE_PASS__
+
+#if XD3_ENCODER
+
+/******************************************************************************************
+ Templates
+ ******************************************************************************************/
+
+/* Template macros: less than 30 lines work.  the template parameters appear as, e.g.,
+ * SLOOK, MIN_MATCH, TRYLAZY, etc. */
+#define XD3_TEMPLATE(x)      XD3_TEMPLATE2(x,TEMPLATE)
+#define XD3_TEMPLATE2(x,n)   XD3_TEMPLATE3(x,n)
+#define XD3_TEMPLATE3(x,n)   x ## n
+#define XD3_STRINGIFY(x)     XD3_STRINGIFY2(x)
+#define XD3_STRINGIFY2(x)    #x
+
+static int XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream);
+
+static const xd3_smatcher XD3_TEMPLATE(__smatcher_) =
+{
+  XD3_STRINGIFY(TEMPLATE),
+  XD3_TEMPLATE(xd3_string_match_),
+#if SOFTCFG == 1
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+#else
+  LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, SSMATCH, TRYLAZY, MAXLAZY,
+  LONGENOUGH, PROMOTE
+#endif
+};
+
+static int
+XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream)
+{
+  /* TODO config: These next three variables should be statically compliled in various
+   * scan_cfg configurations? */
+  const int      DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS);
+  const int      DO_LARGE = (stream->src != NULL);
+  const int      DO_RUN   = (1);
+
+  const uint8_t *inp;
+  uint32_t       scksum = 0;
+  uint32_t       lcksum = 0;
+  usize_t         sinx;
+  usize_t         linx;
+  uint8_t        run_c;
+  int            run_l;
+  int            ret;
+  usize_t         match_length;
+  usize_t         match_offset;  // Note: "may be unused" warnings are bogus
+  usize_t         next_move_point;
+
+  /* If there will be no compression due to settings or short input, skip it entirely. */
+  if (! (DO_SMALL || DO_LARGE || DO_RUN) || pos_in + SLOOK > max_in) { goto loopnomore; }
+
+  if ((ret = xd3_string_match_init (stream))) { return ret; }
+
+  /* The restartloop label is reached when the incremental loop state needs to be
+   * reset. */
+ restartloop:
+
+  /* If there is not enough input remaining for any kind of match, skip it. */
+  if (pos_in + SLOOK > max_in) { goto loopnomore; }
+
+  IF_DEBUG1 ({
+    static int x = 0;
+    P(RINT "[string match:%d] pos_in %d; \n",
+       x++, pos_in);
+  });
+
+  /* Now reset the incremental loop state: */
+
+  /* The min_match variable is updated to avoid matching the same lazy match over and over
+   * again.  For example, if you find a (small) match of length 9 at one position, you
+   * will likely find a match of length 8 at the next position. */
+  min_match = MIN_MATCH;
+
+  /* The current input byte. */
+  inp = stream->next_in + pos_in;
+
+  /* Small match state. */
+  if (DO_SMALL)
+    {
+      scksum = xd3_scksum (inp, SLOOK);
+    }
+
+  /* Run state. */
+  if (DO_RUN)
+    {
+      run_l = xd3_comprun (inp, SLOOK, & run_c);
+    }
+
+  /* Large match state.  We continue the loop even after not enough bytes for LLOOK
+   * remain, so always check pos_in in DO_LARGE code. */
+  if (DO_LARGE && (pos_in + LLOOK <= max_in))
+    {
+      /* Source window: next_move_point is the point that pos_in must reach before
+       * computing more source checksum. */
+      if ((ret = xd3_srcwin_move_point (stream, & next_move_point)))
+	{
+	  return ret;
+	}
+
+      lcksum = xd3_lcksum (inp, LLOOK);
+    }
+
+  /* TRYLAZYLEN: True if a certain length match should be followed by lazy search.  This
+   * checks that LEN is shorter than MAXLAZY and that there is enough leftover data to
+   * consider lazy matching.  "Enough" is set to 2 since the next match will start at the
+   * next offset, it must match two extra characters. */
+#define TRYLAZYLEN(LEN,POS,MAX) ((TRYLAZY && (LEN) < MAXLAZY) && ((POS) + (LEN) <= (MAX) - 2))
+
+  /* HANDLELAZY: This statement is called each time an instruciton is emitted (three
+   * cases).  If the instruction is large enough, the loop is restarted, otherwise lazy
+   * matching may ensue. */
+#define HANDLELAZY(mlen)                                         \
+  if (TRYLAZYLEN ((mlen), pos_in, max_in))                       \
+    { min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; }   \
+  else                                                           \
+    { pos_in += (mlen); goto restartloop; }
+
+  /* Now loop over one input byte at a time until a match is found... */
+  for (;; inp += 1, pos_in += 1)
+    {
+      /* Now we try three kinds of string match in order of expense:
+       * run, large match, small match. */
+
+      /* Expand the start of a RUN.  The test for (run_l == SLOOK) avoids repeating this
+       * check when we pass through a run area performing lazy matching.  The run is only
+       * expanded once when the min_match is first reached.  If lazy matching is
+       * performed, the run_l variable will remain inconsistent until the first
+       * non-running input character is reached, at which time the run_l may then again
+       * grow to SLOOK. */
+      if (DO_RUN && run_l == SLOOK)
+	{
+	  usize_t max_len = max_in - pos_in;
+
+	  IF_DEBUG (xd3_verify_run_state (stream, inp, run_l, run_c));
+
+	  while (run_l < max_len && inp[run_l] == run_c) { run_l += 1; }
+
+	  /* Output a RUN instruction. */
+	  if (run_l >= min_match && run_l >= MIN_RUN)
+	    {
+	      if ((ret = xd3_emit_run (stream, pos_in, run_l, run_c))) { return ret; }
+
+	      HANDLELAZY (run_l);
+	    }
+	}
+
+      /* If there is enough input remaining. */
+      if (DO_LARGE && (pos_in + LLOOK <= max_in))
+	{
+	  if ((pos_in >= next_move_point) &&
+	      (ret = xd3_srcwin_move_point (stream, & next_move_point)))
+	    {
+	      return ret;
+	    }
+
+	  linx = xd3_checksum_hash (& stream->large_hash, lcksum);
+
+	  IF_DEBUG (xd3_verify_large_state (stream, inp, lcksum));
+
+	  /* Note: To handle large checksum duplicates, this code should be rearranged to
+	   * resemble the small_match case more.  But how much of the code can be truly
+	   * shared?  The main difference is the need for xd3_source_extend_match to work
+	   * outside of xd3_string_match, in the case where inputs are identical. */
+	  if (unlikely (stream->large_table[linx] != 0))
+	    {
+	      /* the match_setup will fail if the source window has been decided and the
+	       * match lies outside it.  You could consider forcing a window at this point
+	       * to permit a new source window. */
+	      if (xd3_source_match_setup (stream, stream->large_table[linx] - HASH_CKOFFSET) == 0)
+		{
+		  if ((ret = xd3_source_extend_match (stream))) { return ret; }
+
+		  /* Update stream position.  match_fwd is zero if no match. */
+		  if (stream->match_fwd > 0)
+		    {
+		      HANDLELAZY (stream->match_fwd);
+		    }
+		}
+	    }
+	}
+
+      /* Small matches. */
+      if (DO_SMALL)
+	{
+	  sinx = xd3_checksum_hash (& stream->small_hash, scksum);
+
+	  /* Verify incremental state in debugging mode. */
+	  IF_DEBUG (xd3_verify_small_state (stream, inp, scksum));
+
+	  /* Search for the longest match */
+	  if (unlikely (stream->small_table[sinx] != 0))
+	    {
+	      match_length = xd3_smatch (stream,
+					 stream->small_table[sinx],
+					 scksum,
+					 & match_offset);
+	    }
+	  else
+	    {
+	      match_length = 0;
+	    }
+
+	  /* Insert a hash for this string. */
+	  xd3_scksum_insert (stream, sinx, scksum, pos_in);
+
+	  /* Promote the previous match address to head of the hash bucket.  This is
+	   * intended to improve the same cache hit rate. */
+	  if (match_length != 0 && PROMOTE)
+	    {
+	      xd3_scksum_insert (stream, sinx, scksum, match_offset);
+	    }
+
+	  /* Maybe output a COPY instruction */
+	  if (unlikely (match_length >= min_match))
+	    {
+	      IF_DEBUG1 ({
+		static int x = 0;
+		P(RINT "[target match:%d] <inp %u %u>  <cpy %u %u> (-%d) [ %u bytes ]\n",
+		   x++,
+		   pos_in,
+		   pos_in + match_length,
+		   match_offset,
+		   match_offset + match_length,
+		   pos_in - match_offset,
+		   match_length);
+	      });
+
+	      if ((ret = xd3_found_match (stream,
+					/* decoder position */ pos_in,
+					/* length */ match_length,
+					/* address */ match_offset,
+					/* is_source */ 0))) { return ret; }
+
+	      /* SSMATCH option: search small matches: continue the incremental checksum
+	       * through the matched material.  Only if not lazy matching.  */
+	      if (SSMATCH && !TRYLAZYLEN (match_length, pos_in, max_in))
+		{
+		  usize_t avail = max_in - SLOOK - pos_in;
+		  usize_t ml_m1 = match_length - 1;
+		  usize_t right;
+		  int    aincr;
+
+		  IF_DEBUG (usize_t nposi = pos_in + match_length);
+
+		  /* Avail is the last offset we can compute an incremental cksum.  If the
+		   * match length exceeds that offset then we are finished performing
+		   * incremental updates after this step.  */
+		  if (ml_m1 < avail)
+		    {
+		      right = ml_m1;
+		      aincr = 1;
+		    }
+		  else
+		    {
+		      right = avail;
+		      aincr = 0;
+		    }
+
+		  /* Compute incremental checksums within the match. */
+		  while (right > 0)
+		    {
+		      SMALL_CKSUM_UPDATE (scksum, inp, SLOOK);
+		      if (DO_LARGE && (pos_in + LLOOK < max_in)) {
+			LARGE_CKSUM_UPDATE (lcksum, inp, LLOOK);
+		      }
+
+		      inp    += 1;
+		      pos_in += 1;
+		      right  -= 1;
+		      sinx = xd3_checksum_hash (& stream->small_hash, scksum);
+
+		      IF_DEBUG (xd3_verify_small_state (stream, inp, scksum));
+
+		      xd3_scksum_insert (stream, sinx, scksum, pos_in);
+		    }
+
+		  if (aincr)
+		    {
+		      /* Keep searching... */
+		      if (DO_RUN) { run_l = xd3_comprun (inp+1, SLOOK-1, & run_c); }
+		      XD3_ASSERT (nposi == pos_in + 1);
+		      XD3_ASSERT (pos_in + SLOOK < max_in);
+		      min_match = MIN_MATCH;
+		      goto updatesure;
+		    }
+		  else
+		    {
+		      /* Not enough input for another match. */
+		      XD3_ASSERT (pos_in + SLOOK >= max_in);
+		      goto loopnomore;
+		    }
+		}
+
+	      /* Else case: copy instruction, but no SSMATCH. */
+	      HANDLELAZY (match_length);
+	    }
+	}
+
+      /* The logic above prevents excess work during lazy matching by increasing min_match
+       * to avoid smaller matches.  Each time we advance pos_in by one, the minimum match
+       * shortens as well.  */
+      if (min_match > MIN_MATCH)
+	{
+	  min_match -= 1;
+	}
+
+    updateone:
+
+      /* See if there are no more incremental cksums to compute. */
+      if (pos_in + SLOOK == max_in)
+	{
+	  goto loopnomore;
+	}
+
+    updatesure:
+
+      /* Compute next RUN, CKSUM */
+      if (DO_RUN)   { NEXTRUN (inp[SLOOK]); }
+      if (DO_SMALL) { SMALL_CKSUM_UPDATE (scksum, inp, SLOOK); }
+      if (DO_LARGE && (pos_in + LLOOK < max_in)) { LARGE_CKSUM_UPDATE (lcksum, inp, LLOOK); }
+    }
+
+ loopnomore:
+  return 0;
+}
+#endif /* XD3_ENCODER */
+#endif /* __XDELTA3_C_TEMPLATE_PASS__ */
diff --git a/xdelta3/xdelta3.h b/xdelta3/xdelta3.h
new file mode 100755
index 0000000..a35c9b0
--- /dev/null
+++ b/xdelta3/xdelta3.h
@@ -0,0 +1,1029 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2001, 2003, 2004, 2005, 2006.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* Welcome to Xdelta.  If you want to know more about Xdelta, start by reading xdelta3.c.
+ * If you are ready to use the API, continue reading here.  There are two interfaces --
+ * xd3_encode_input and xd3_decode_input -- plus a dozen or so related calls.  This
+ * interface is styled after Zlib. */
+
+#ifndef _XDELTA3_H_
+#define _XDELTA3_H_
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+/**********************************************************************/
+
+/* Default configured value of stream->winsize.  If the program supplies
+ * xd3_encode_input() with data smaller than winsize the stream will
+ * automatically buffer the input, otherwise the input buffer is used directly.
+ */
+#ifndef XD3_DEFAULT_WINSIZE
+#define XD3_DEFAULT_WINSIZE (1U << 18)
+#endif
+
+/* The source block size.
+ */
+#ifndef XD3_DEFAULT_SRCBLKSZ
+#define XD3_DEFAULT_SRCBLKSZ (1U << 18)
+#endif
+
+/* The source window starts with only a few checksums, then doubles up to
+ * XD3_DEFAULT_MAX_CKSUM_ADVANCE. */
+#ifndef XD3_DEFAULT_START_CKSUM_ADVANCE
+#define XD3_DEFAULT_START_CKSUM_ADVANCE 1024
+#endif
+
+/* TODO: There is no command-line flag to set this value. */
+#ifndef XD3_DEFAULT_MAX_CKSUM_ADVANCE
+#define XD3_DEFAULT_MAX_CKSUM_ADVANCE (1U << 23)
+#endif
+
+/* Default total size of the source window used in xdelta3-main.h */
+#ifndef XD3_DEFAULT_SRCWINSZ
+#define XD3_DEFAULT_SRCWINSZ (1U << 23)
+#endif
+
+/* Default configured value of stream->memsize.  This dictates how much memory Xdelta will
+ * use for string-matching data structures. */
+#ifndef XD3_DEFAULT_MEMSIZE
+#define XD3_DEFAULT_MEMSIZE (1U << 18)
+#endif
+
+/* When Xdelta requests a memory allocation for certain buffers, it rounds up to units of
+ * at least this size.  The code assumes (and asserts) that this is a power-of-two. */
+#ifndef XD3_ALLOCSIZE
+#define XD3_ALLOCSIZE (1U<<13)
+#endif
+
+/* The XD3_HARDMAXWINSIZE parameter is a safety mechanism to protect decoders against
+ * malicious files.  The decoder will never decode a window larger than this.  If the file
+ * specifies VCD_TARGET the decoder may require two buffers of this size.  Rationale for
+ * choosing 22-bits as a maximum: this means that in the worst case, any VCDIFF address
+ * without a copy window will require 3 bytes to encode (7 bits per byte, HERE and SAME
+ * modes making every address within half the window away. */
+#ifndef XD3_HARDMAXWINSIZE
+#define XD3_HARDMAXWINSIZE (1U<<23)
+#endif
+
+/* The XD3_NODECOMPRESSSIZE parameter tells the xdelta main routine not to try to
+ * externally-decompress source inputs that are too large.  Since these files must be
+ * seekable, they are decompressed to a temporary file location and the user may not wish
+ * for this. */
+#ifndef XD3_NODECOMPRESSSIZE
+#define XD3_NODECOMPRESSSIZE (1U<<24)
+#endif
+
+/* The IOPT_SIZE value sets the size of a buffer used to batch overlapping copy
+ * instructions before they are optimized by picking the best non-overlapping ranges.  The
+ * larger this buffer, the longer a forced xd3_srcwin_setup() decision is held off. */
+#ifndef XD3_DEFAULT_IOPT_SIZE
+#define XD3_DEFAULT_IOPT_SIZE    128
+#endif
+
+/* The maximum distance backward to search for small matches */
+#ifndef XD3_DEFAULT_SPREVSZ
+#define XD3_DEFAULT_SPREVSZ (1U << 16)
+#endif
+
+/* Sizes and addresses within VCDIFF windows are represented as usize_t
+ *
+ * For source-file offsets and total file sizes, total input and output counts, the xoff_t
+ * type is used.  The decoder and encoder generally check for overflow of the xoff_t size,
+ * and this is tested at the 32bit boundary [xdelta3-test.h].
+ */
+#ifndef _WIN32
+typedef unsigned int    usize_t;
+typedef u_int8_t        uint8_t;
+typedef u_int16_t       uint16_t;
+typedef u_int32_t       uint32_t;
+typedef u_int64_t       uint64_t;
+#else
+#include <windows.h>
+#define INLINE
+typedef unsigned int   uint;
+typedef unsigned int   usize_t
+typedef unsigned char  uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned long  uint32_t;
+typedef ULONGLONG      uint64_t;
+#endif
+
+#define SIZEOF_USIZE_T 4
+
+#ifndef XD3_USE_LARGEFILE64
+#define XD3_USE_LARGEFILE64 1
+#endif
+
+#if XD3_USE_LARGEFILE64
+#define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops, ... ? */
+typedef uint64_t xoff_t;
+#define SIZEOF_XOFF_T 8
+#else
+typedef uint32_t xoff_t;
+#define SIZEOF_XOFF_T 4
+#endif
+
+#define USE_UINT32 (SIZEOF_USIZE_T == 4 || SIZEOF_XOFF_T == 4 || REGRESSION_TEST)
+#define USE_UINT64 (SIZEOF_USIZE_T == 8 || SIZEOF_XOFF_T == 8 || REGRESSION_TEST)
+
+/**********************************************************************/
+
+#ifndef INLINE
+#define INLINE inline
+#endif
+
+/* Whether to build the encoder, otherwise only build the decoder. */
+#ifndef XD3_ENCODER
+#define XD3_ENCODER 1
+#endif
+
+/* The code returned when main() fails, also defined in system includes. */
+#ifndef EXIT_FAILURE
+#define EXIT_FAILURE 1
+#endif
+
+/* REGRESSION TEST enables the "xdelta3 test" command, which runs a series of self-tests. */
+#ifndef REGRESSION_TEST
+#define REGRESSION_TEST 0
+#endif
+
+/* XD3_DEBUG=1 enables assertions and various statistics.  Levels > 1 enable some
+ * additional output only useful during development and debugging. */
+#ifndef XD3_DEBUG
+#define XD3_DEBUG 0
+#endif
+
+#ifndef PYTHON_MODULE
+#define PYTHON_MODULE 0
+#endif
+
+/* There are three string matching functions supplied: one fast, one slow (default), and
+ * one soft-configurable.  To disable any of these, use the following definitions. */
+#ifndef XD3_BUILD_SLOW
+#define XD3_BUILD_SLOW 1
+#endif
+#ifndef XD3_BUILD_FAST
+#define XD3_BUILD_FAST 1
+#endif
+#ifndef XD3_BUILD_SOFT
+#define XD3_BUILD_SOFT 1
+#endif
+
+#if XD3_DEBUG
+#include <stdio.h>
+#endif
+
+/* XPRINT.  Debug output and VCDIFF_TOOLS functions report to stderr.  I have used an
+ * irregular style to abbreviate [fprintf(stderr, "] as [P(RINT "]. */
+#define P    fprintf
+#define RINT stderr,
+
+typedef struct _xd3_stream             xd3_stream;
+typedef struct _xd3_source             xd3_source;
+typedef struct _xd3_hash_cfg           xd3_hash_cfg;
+typedef struct _xd3_smatcher           xd3_smatcher;
+typedef struct _xd3_rinst              xd3_rinst;
+typedef struct _xd3_dinst              xd3_dinst;
+typedef struct _xd3_hinst              xd3_hinst;
+typedef struct _xd3_rpage              xd3_rpage;
+typedef struct _xd3_addr_cache         xd3_addr_cache;
+typedef struct _xd3_output             xd3_output;
+typedef struct _xd3_desect             xd3_desect;
+typedef struct _xd3_iopt_buf           xd3_iopt_buf;
+typedef struct _xd3_rlist              xd3_rlist;
+typedef struct _xd3_sec_type           xd3_sec_type;
+typedef struct _xd3_sec_cfg            xd3_sec_cfg;
+typedef struct _xd3_sec_stream         xd3_sec_stream;
+typedef struct _xd3_config             xd3_config;
+typedef struct _xd3_code_table_desc    xd3_code_table_desc;
+typedef struct _xd3_code_table_sizes   xd3_code_table_sizes;
+typedef struct _xd3_slist              xd3_slist;
+
+/* The stream configuration has three callbacks functions, all of which may be supplied
+ * with NULL values.  If config->getblk is provided as NULL, the stream returns
+ * XD3_GETSRCBLK. */
+
+typedef void*  (xd3_alloc_func)    (void       *opaque,
+				    usize_t      items,
+				    usize_t      size);
+typedef void   (xd3_free_func)     (void       *opaque,
+				    void       *address);
+
+typedef int    (xd3_getblk_func)   (xd3_stream *stream,
+				    xd3_source *source,
+				    xoff_t      blkno);
+
+/* These are internal functions to delay construction of encoding tables and support
+ * alternate code tables.  See the comments & code enabled by GENERIC_ENCODE_TABLES. */
+
+typedef const xd3_dinst* (xd3_code_table_func) (void);
+typedef int              (xd3_comp_table_func) (xd3_stream *stream, const uint8_t **data, usize_t *size);
+
+
+/* Some junk. */
+
+#ifndef XD3_ASSERT
+#if XD3_DEBUG
+#define XD3_ASSERT(x) \
+    do { if (! (x)) { P(RINT "%s:%d: XD3 assertion failed: %s\n", __FILE__, __LINE__, #x); \
+    abort (); } } while (0)
+#else
+#define XD3_ASSERT(x) (void)0
+#endif
+#endif
+
+#ifdef __GNUC__
+/* As seen on linux-kernel. */
+#ifndef max
+#define max(x,y) ({ \
+	const typeof(x) _x = (x);	\
+	const typeof(y) _y = (y);	\
+	(void) (&_x == &_y);		\
+	_x > _y ? _x : _y; })
+#endif
+
+#ifndef min
+#define min(x,y) ({ \
+	const typeof(x) _x = (x);	\
+	const typeof(y) _y = (y);	\
+	(void) (&_x == &_y);		\
+	_x < _y ? _x : _y; })
+#endif
+#else
+#ifndef max
+#define max(x,y) ((x) < (y) ? (y) : (x))
+#endif
+#ifndef min
+#define min(x,y) ((x) < (y) ? (x) : (y))
+#endif
+#endif
+
+/******************************************************************************************
+ PUBLIC ENUMS
+ ******************************************************************************************/
+
+/* These are the five ordinary status codes returned by the xd3_encode_input() and
+ * xd3_decode_input() state machines. */
+typedef enum {
+
+  /* An application must be prepared to handle these five return values from either
+   * xd3_encode_input or xd3_decode_input, except in the case of no-source compression, in
+   * which case XD3_GETSRCBLK is never returned.  More detailed comments for these are
+   * given in xd3_encode_input and xd3_decode_input comments, below. */
+  XD3_INPUT     = -17703, /* need input */
+  XD3_OUTPUT    = -17704, /* have output */
+  XD3_GETSRCBLK = -17705, /* need a block of source input (with no xd3_getblk function),
+			   * a chance to do non-blocking read. */
+  XD3_GOTHEADER = -17706, /* (decode-only) after the initial VCDIFF & first window header */
+  XD3_WINSTART  = -17707, /* notification: returned before a window is processed, giving a
+			   * chance to XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that window. */
+  XD3_WINFINISH = -17708, /* notification: returned after encode/decode & output for a window */
+
+} xd3_rvalues;
+
+/* special values in config->flags */
+typedef enum
+{
+  XD3_JUST_HDR       = (1 << 1),   /* used by VCDIFF tools, see xdelta3-main.h. */
+  XD3_SKIP_WINDOW    = (1 << 2),   /* used by VCDIFF tools, see xdelta3-main.h. */
+  XD3_SKIP_EMIT      = (1 << 3),   /* used by VCDIFF tools, see xdelta3-main.h. */
+  XD3_FLUSH          = (1 << 4),   /* flush the stream buffer to prepare for xd3_stream_close(). */
+
+  XD3_SEC_DJW        = (1 << 5),   /* use DJW static huffman */
+  XD3_SEC_FGK        = (1 << 6),   /* use FGK adaptive huffman */
+  XD3_SEC_TYPE       = (XD3_SEC_DJW | XD3_SEC_FGK),
+
+  XD3_SEC_NODATA     = (1 << 7),   /* disable secondary compression of the data section. */
+  XD3_SEC_NOINST     = (1 << 8),   /* disable secondary compression of the inst section. */
+  XD3_SEC_NOADDR     = (1 << 9),   /* disable secondary compression of the addr section (which is most random). */
+
+  XD3_SEC_OTHER      = (XD3_SEC_NODATA | XD3_SEC_NOINST | XD3_SEC_NOADDR),
+
+  XD3_ADLER32        = (1 << 10),  /* enable checksum computation in the encoder. */
+  XD3_ADLER32_NOVER  = (1 << 11),  /* disable checksum verification in the decoder. */
+
+  XD3_ALT_CODE_TABLE = (1 << 12),  /* for testing the alternate code table encoding. */
+
+  XD3_NOCOMPRESS     = (1 << 13),  /* disable ordinary data compression feature,
+				    * only search the source, not the target. */
+  XD3_BEGREEDY       = (1 << 14),  /* disable the "1.5-pass algorithm", instead use
+				    * greedy matching.  Greedy is off by default. */
+} xd3_flags;
+
+/* The values of this enumeration are set in xd3_config using the smatch_cfg variable.  It
+ * can be set to slow, fast, soft, or default.  The fast and slow setting uses preset,
+ * hardcoded parameters and the soft setting is accompanied by user-supplied parameters.
+ * If the user supplies 'default' the code selects one of the available string matchers.
+ * Due to compile-time settings (see XD3_SLOW_SMATCHER, XD3_FAST_SMATCHER,
+ * XD3_SOFT_SMATCHER variables), not all options may be available. */
+typedef enum
+{
+  XD3_SMATCH_DEFAULT = 0,
+  XD3_SMATCH_SLOW    = 1,
+  XD3_SMATCH_FAST    = 2,
+  XD3_SMATCH_SOFT    = 3,
+} xd3_smatch_cfg;
+
+/******************************************************************************************
+ PRIVATE ENUMS
+ ******************************************************************************************/
+
+/* stream->match_state is part of the xd3_encode_input state machine for source matching:
+ *
+ *  1. the XD3_GETSRCBLK block-read mechanism means reentrant matching
+ *  2. this state spans encoder windows: a match and end-of-window will continue in the next
+ *  3. the initial target byte and source byte are a presumed match, to avoid some computation
+ *  in case the inputs are identical.
+ */
+typedef enum {
+
+  MATCH_TARGET    = 0, /* in this state, attempt to match the start of the target with the
+			* previously set source address (initially 0). */
+  MATCH_BACKWARD  = 1, /* currently expanding a match backward in the source/target. */
+  MATCH_FORWARD   = 2, /* currently expanding a match forward in the source/target. */
+  MATCH_SEARCHING = 3, /* currently searching for a match. */
+
+} xd3_match_state;
+
+/* The xd3_encode_input state machine steps through these states in the following order.
+ * The matcher is reentrant and returns XD3_INPUT whenever it requires more data.  After
+ * receiving XD3_INPUT, if the application reads EOF it should call xd3_stream_close().
+ */
+typedef enum {
+
+  ENC_INIT      = 0, /* xd3_encode_input has never been called. */
+  ENC_INPUT     = 1, /* waiting for xd3_avail_input () to be called. */
+  ENC_SEARCH    = 2, /* currently searching for matches. */
+  ENC_FLUSH     = 3, /* currently emitting output. */
+  ENC_POSTOUT   = 4, /* after an output section. */
+  ENC_POSTWIN   = 5, /* after all output sections. */
+  ENC_ABORTED   = 6, /* abort. */
+} xd3_encode_state;
+
+/* The xd3_decode_input state machine steps through these states in the following order.
+ * The matcher is reentrant and returns XD3_INPUT whenever it requires more data.  After
+ * receiving XD3_INPUT, if the application reads EOF it should call xd3_stream_close().
+ *
+ * 0-8:   the VCDIFF header
+ * 9-18:  the VCDIFF window header
+ * 19-21: the three primary sections: data (which I think should have gone last), inst, addr
+ * 22:    producing output: returns XD3_OUTPUT, possibly XD3_GETSRCBLK,
+ * 23:    return XD3_WINFINISH, set state=9 to decode more input
+ */
+typedef enum {
+
+  DEC_VCHEAD   = 0, /* VCDIFF header */
+  DEC_HDRIND   = 1, /* header indicator */
+
+  DEC_SECONDID = 2, /* secondary compressor ID */
+
+  DEC_TABLEN   = 3, /* code table length */
+  DEC_NEAR     = 4, /* code table near */
+  DEC_SAME     = 5, /* code table same */
+  DEC_TABDAT   = 6, /* code table data */
+
+  DEC_APPLEN   = 7, /* application data length */
+  DEC_APPDAT   = 8, /* application data */
+
+  DEC_WININD   = 9, /* window indicator */
+
+  DEC_CPYLEN   = 10, /* copy window length */
+  DEC_CPYOFF   = 11, /* copy window offset */
+
+  DEC_ENCLEN   = 12, /* length of delta encoding */
+  DEC_TGTLEN   = 13, /* length of target window */
+  DEC_DELIND   = 14, /* delta indicator */
+
+  DEC_DATALEN  = 15, /* length of ADD+RUN data */
+  DEC_INSTLEN  = 16, /* length of instruction data */
+  DEC_ADDRLEN  = 17, /* length of address data */
+
+  DEC_CKSUM    = 18, /* window checksum */
+
+  DEC_DATA     = 19, /* data section */
+  DEC_INST     = 20, /* instruction section */
+  DEC_ADDR     = 21, /* address section */
+
+  DEC_EMIT     = 22, /* producing data */
+
+  DEC_FINISH   = 23, /* window finished */
+
+  DEC_ABORTED  = 24, /* xd3_abort_stream */
+} xd3_decode_state;
+
+/* An application never sees these internal codes: */  
+typedef enum {
+  XD3_NOSECOND  = -17708, /* when secondary compression finds no improvement. */
+} xd3_pvalues;
+
+/******************************************************************************************
+ internal types
+ ******************************************************************************************/
+
+/* instruction lists used in the IOPT buffer */
+struct _xd3_rlist
+{
+  xd3_rlist  *next;
+  xd3_rlist  *prev;
+};
+
+/* the raw encoding of an instruction used in the IOPT buffer */
+struct _xd3_rinst
+{
+  uint8_t     type;
+  uint8_t     xtra;
+  uint8_t     code1;
+  uint8_t     code2;
+  usize_t      pos;
+  usize_t      size;
+  xoff_t      addr;
+  xd3_rlist   link;
+};
+
+/* the code-table form of an single- or double-instruction */
+struct _xd3_dinst
+{
+  uint8_t     type1;
+  uint8_t     size1;
+  uint8_t     type2;
+  uint8_t     size2;
+};
+
+/* the decoded form of a single (half) instruction. */
+struct _xd3_hinst
+{
+  uint8_t     type;
+  usize_t      size;
+  usize_t      addr;
+};
+
+/* used by the encoder to buffer output in sections.  list of blocks. */
+struct _xd3_output
+{
+  uint8_t    *base;
+  usize_t      next;
+  usize_t      avail;
+  xd3_output *next_page;
+};
+
+/* the VCDIFF address cache, see the RFC */
+struct _xd3_addr_cache
+{
+  uint     s_near;
+  uint     s_same;
+  usize_t  next_slot;  /* the circular index for near */
+  usize_t *near_array; /* array of size s_near        */
+  usize_t *same_array; /* array of size s_same*256    */
+};
+
+/* the IOPT buffer has a used list of (ordered) instructions, possibly overlapping in
+ * target addresses, awaiting a flush */
+struct _xd3_iopt_buf
+{
+  xd3_rlist  used;
+  xd3_rlist  free;
+  xd3_rinst *buffer;
+};
+
+/* This is the record of a pre-compiled configuration, a subset of xd3_config.  Keep them
+ * in sync!  The user never sees this structure.  Note: update XD3_SOFTCFG_VARCNT when
+ * changing. */
+struct _xd3_smatcher
+{
+  const char        *name;
+  int             (*string_match) (xd3_stream  *stream);
+  uint               large_look;
+  uint               large_step;
+  uint               small_look;
+  uint               small_chain;
+  uint               small_lchain;
+  uint               ssmatch;
+  uint               try_lazy;
+  uint               max_lazy;
+  uint               long_enough;
+  uint               promote;
+};
+
+/* hash table size & power-of-two hash function. */
+struct _xd3_hash_cfg
+{
+  usize_t           size;
+  usize_t           shift;
+  usize_t           mask;
+};
+
+/* a hash-chain link in the small match table, embedded with position and checksum */
+struct _xd3_slist
+{
+  xd3_slist *next;
+  xd3_slist *prev;
+  usize_t     pos;
+  usize_t     scksum;
+};
+
+/* a decoder section (data, inst, or addr).  there is an optimization to avoid copying
+ * these sections if all the input is available, related to the copied field below.
+ * secondation compression uses the copied2 field. */
+struct _xd3_desect
+{
+  const uint8_t *buf;
+  const uint8_t *buf_max;
+  usize_t         size;
+  usize_t         pos;
+  uint8_t       *copied1;
+  usize_t         alloc1;
+  uint8_t       *copied2;
+  usize_t         alloc2;
+};
+
+/******************************************************************************************
+ public types
+ ******************************************************************************************/
+
+/* Settings for the secondary compressor. */
+struct _xd3_sec_cfg
+{
+  int                data_type;     /* Which section. (set automatically) */
+  int                ngroups;       /* Number of DJW Huffman groups. */
+  int                sector_size;   /* Sector size. */
+  int                inefficient;   /* If true, ignore efficiency check [avoid XD3_NOSECOND]. */
+};
+
+/* This is the user-visible stream configuration. */
+struct _xd3_config
+{
+  usize_t             memsize;       /* How much memory Xdelta may allocate */
+  usize_t             winsize;       /* The encoder window size. */
+  usize_t             sprevsz;       /* How far back small string matching goes */
+  usize_t             iopt_size;     /* entries in the instruction-optimizing buffer */
+
+  usize_t             srcwin_size;   /* Initial size of the source-window lookahead */
+  usize_t             srcwin_maxsz;  /* srcwin_size grows by a factor of 2 when no matches are found */
+
+  xd3_getblk_func   *getblk;        /* The three callbacks. */
+  xd3_alloc_func    *alloc;
+  xd3_free_func     *freef;
+  void              *opaque;        /* Not used. */
+  int                flags;         /* stream->flags are initialized from xd3_config &
+				     * never modified by the library.  Use xd3_set_flags
+				     * to modify flags settings mid-stream. */
+
+  xd3_sec_cfg       sec_data;       /* Secondary compressor config: data */
+  xd3_sec_cfg       sec_inst;       /* Secondary compressor config: inst */
+  xd3_sec_cfg       sec_addr;       /* Secondary compressor config: addr */
+
+  xd3_smatch_cfg     smatch_cfg;    /* See enum: use fields below for soft config */
+  uint               large_look;    /* large string lookahead (i.e., hashed chars) */
+  uint               large_step;    /* large string interval */
+  uint               small_look;    /* small string lookahead (i.e., hashed chars) */
+  uint               small_chain;   /* small string number of previous matches to try */
+  uint               small_lchain;  /* small string number of previous matches to try, when a lazy match */
+  uint               ssmatch;       /* boolean: insert checksums for matched strings */
+  uint               try_lazy;      /* boolean: whether lazy instruction optimization is attempted */
+  uint               max_lazy;      /* size of smallest match that will disable lazy matching */
+  uint               long_enough;   /* size of smallest match long enough to discontinue string matching. */
+  uint               promote;       /* whether to promote matches in the hash chain */
+};
+
+/* The primary source file object. You create one of these objects and initialize the first
+ * four fields.  This library maintains the next 5 fields.  The configured getblk implementation is
+ * responsible for setting the final 3 fields when called (and/or when XD3_GETSRCBLK is returned).
+ */
+struct _xd3_source
+{
+  /* you set */
+  xoff_t              size;          /* size of this source */
+  usize_t             blksize;       /* block size */
+  const char         *name;          /* its name, for debug/print purposes */
+  void               *ioh;           /* opaque handle */
+
+  /* xd3 sets */
+  usize_t             srclen;        /* length of this source window */
+  xoff_t              srcbase;       /* offset of this source window in the source itself */
+  xoff_t              blocks;        /* the total number of blocks in this source */
+  usize_t             cpyoff_blocks; /* offset of copy window in blocks */
+  usize_t             cpyoff_blkoff; /* offset of copy window in blocks, remainder */
+  xoff_t              getblkno;      /* request block number: xd3 sets current getblk request */
+
+  /* getblk sets */
+  xoff_t              curblkno;      /* current block number: client sets after getblk request */
+  usize_t             onblk;         /* number of bytes on current block: client sets, xd3 verifies */
+  const uint8_t      *curblk;        /* current block array: client sets after getblk request */
+};
+
+/* The primary xd3_stream object, used for encoding and decoding.  You may access only two
+ * fields: avail_out, next_out.  Use the methods above to operate on xd3_stream. */
+struct _xd3_stream
+{
+  /* input state */
+  const uint8_t    *next_in;          /* next input byte */
+  usize_t           avail_in;         /* number of bytes available at next_in */
+  xoff_t            total_in;         /* how many bytes in */
+
+  /* output state */
+  uint8_t          *next_out;         /* next output byte */
+  usize_t           avail_out;        /* number of bytes available at next_out */
+  usize_t           space_out;        /* total out space */
+  xoff_t            current_window;   /* number of windows encoded/decoded */
+  xoff_t            total_out;        /* how many bytes out */
+
+  /* to indicate an error, xd3 sets */
+  const char       *msg;              /* last error message, NULL if no error */
+
+  /* source configuration */
+  xd3_source       *src;              /* source array */
+
+  /* encoder memory configuration */
+  usize_t           winsize;          /* suggested window size */
+  usize_t           memsize;          /* memory size parameter */
+  usize_t           sprevsz;          /* small string, previous window size (power of 2) */
+  usize_t           sprevmask;        /* small string, previous window size mask */
+  uint              iopt_size;
+
+  /* general configuration */
+  xd3_getblk_func  *getblk;           /* set nxtblk, nxtblkno to scanblkno */
+  xd3_alloc_func   *alloc;            /* malloc function */
+  xd3_free_func    *free;             /* free function */
+  void*             opaque;           /* private data object passed to alloc, free, and getblk */
+  int               flags;            /* various options */
+  int               aborted;
+  
+  /* secondary compressor configuration */
+  xd3_sec_cfg       sec_data;         /* Secondary compressor config: data */
+  xd3_sec_cfg       sec_inst;         /* Secondary compressor config: inst */
+  xd3_sec_cfg       sec_addr;         /* Secondary compressor config: addr */
+
+  /* fields common to xd3_stream_config, xd3_smatcher */
+  uint              large_look;
+  uint              large_step;
+  uint              small_look;
+  uint              small_chain;
+  uint              small_lchain;
+  uint              ssmatch;
+  uint              try_lazy;
+  uint              max_lazy;
+  uint              long_enough;
+  uint              promote;
+  uint              srcwin_size;
+  uint              srcwin_maxsz;
+  int             (*string_match) (xd3_stream  *stream);
+
+  usize_t           *large_table;      /* table of large checksums */
+  xd3_hash_cfg      large_hash;       /* large hash config */
+
+  usize_t           *small_table;      /* table of small checksums */
+  xd3_slist        *small_prev;       /* table of previous offsets, circular linked list (no sentinel) */
+  int               small_reset;      /* true if small table should be reset */
+
+  xd3_hash_cfg      small_hash;       /* small hash config */
+
+  xd3_addr_cache    acache;           /* the vcdiff address cache */
+
+  xd3_encode_state  enc_state;        /* state of the encoder */
+
+  usize_t            taroff;           /* base offset of the target input */
+  usize_t            input_position;   /* current input position */
+  usize_t            min_match;        /* current minimum match length, avoids redundent matches */
+  usize_t            unencoded_offset; /* current input, first unencoded offset. this value is <= the first
+				       * instruction's position in the iopt buffer, if there is at least one
+				       * match in the buffer. */
+
+  // SRCWIN
+  // these variables plus srcwin_size, srcwin_maxsz above (set by config)
+  int                srcwin_decided;    /* boolean: true if the srclen,srcbase have been decided. */
+  xoff_t             srcwin_cksum_pos;  /* Source checksum position */
+
+  // MATCH
+  xd3_match_state    match_state;      /* encoder match state */
+  xoff_t             match_srcpos;     /* current match source position relative to srcbase */
+  xoff_t             match_minaddr;    /* smallest matching address to set window params
+				       * (reset each window xd3_encode_reset) */
+  xoff_t             match_maxaddr;    /* largest matching address to set window params
+				       * (reset each window xd3_encode_reset) */
+  usize_t            match_back;       /* match extends back so far */
+  usize_t            match_maxback;    /* match extends back maximum */
+  usize_t            match_fwd;        /* match extends forward so far */
+  usize_t            match_maxfwd;     /* match extends forward maximum */
+
+  uint8_t          *buf_in;           /* for saving buffered input */
+  usize_t            buf_avail;        /* amount of saved input */
+  const uint8_t    *buf_leftover;     /* leftover content of next_in (i.e., user's buffer) */
+  usize_t            buf_leftavail;    /* amount of leftover content */
+
+  xd3_output       *enc_current;      /* current output buffer */
+  xd3_output       *enc_free;         /* free output buffers */
+  xd3_output       *enc_heads[4];     /* array of encoded outputs: head of chain */
+  xd3_output       *enc_tails[4];     /* array of encoded outputs: tail of chain */
+
+  xd3_iopt_buf      iopt;             /* instruction optimizing buffer */
+  xd3_rinst        *iout;             /* next single instruction */
+
+  const uint8_t    *enc_appheader;    /* application header to encode */
+  usize_t            enc_appheadsz;    /* application header size */
+
+  /* decoder stuff */
+  xd3_decode_state  dec_state;        /* current DEC_XXX value */
+  uint              dec_hdr_ind;      /* VCDIFF header indicator */
+  uint              dec_win_ind;      /* VCDIFF window indicator */
+  uint              dec_del_ind;      /* VCDIFF delta indicator */
+
+  uint8_t           dec_magic[4];     /* First four bytes */
+  usize_t            dec_magicbytes;   /* Magic position. */
+
+  uint               dec_secondid;     /* Optional secondary compressor ID. */
+
+  usize_t            dec_codetblsz;    /* Optional code table: length. */
+  uint8_t          *dec_codetbl;      /* Optional code table: storage. */
+  usize_t            dec_codetblbytes; /* Optional code table: position. */
+
+  usize_t            dec_appheadsz;    /* Optional application header: size. */
+  uint8_t          *dec_appheader;    /* Optional application header: storage */
+  usize_t            dec_appheadbytes; /* Optional application header: position. */
+
+  usize_t            dec_cksumbytes;   /* Optional checksum: position. */
+  uint8_t           dec_cksum[4];     /* Optional checksum: storage. */
+  uint32_t          dec_adler32;      /* Optional checksum: value. */
+
+  usize_t            dec_cpylen;       /* length of copy window (VCD_SOURCE or VCD_TARGET) */
+  xoff_t            dec_cpyoff;       /* offset of copy window (VCD_SOURCE or VCD_TARGET)  */
+  usize_t            dec_enclen;       /* length of delta encoding */
+  usize_t            dec_tgtlen;       /* length of target window */
+
+#if USE_UINT64
+  uint64_t          dec_64part;       /* part of a decoded uint64_t */
+#endif
+#if USE_UINT32
+  uint32_t          dec_32part;       /* part of a decoded uint32_t */
+#endif
+
+  xoff_t            dec_winstart;     /* offset of the start of current target window */
+  xoff_t            dec_window_count; /* == current_window + 1 in DEC_FINISH */
+  usize_t            dec_winbytes;     /* bytes of the three sections so far consumed */
+  usize_t            dec_hdrsize;      /* VCDIFF + app header size */
+
+  const uint8_t    *dec_tgtaddrbase;  /* Base of decoded target addresses (addr >= dec_cpylen). */
+  const uint8_t    *dec_cpyaddrbase;  /* Base of decoded copy addresses (addr < dec_cpylen). */
+
+  usize_t            dec_position;     /* current decoder position counting the cpylen offset */
+  usize_t            dec_maxpos;       /* maximum decoder position counting the cpylen offset */
+  xd3_hinst         dec_current1;     /* current instruction */
+  xd3_hinst         dec_current2;     /* current instruction */
+
+  uint8_t          *dec_buffer;       /* Decode buffer */
+  uint8_t          *dec_lastwin;      /* In case of VCD_TARGET, the last target window. */
+  usize_t            dec_lastlen;      /* length of the last target window */
+  xoff_t            dec_laststart;    /* offset of the start of last target window */
+  usize_t            dec_lastspace;    /* allocated space of last target window, for reuse */
+
+  xd3_desect        inst_sect;        /* staging area for decoding window sections */
+  xd3_desect        addr_sect;
+  xd3_desect        data_sect;
+
+  xd3_code_table_func       *code_table_func;
+  xd3_comp_table_func       *comp_table_func;
+  const xd3_dinst           *code_table;
+  const xd3_code_table_desc *code_table_desc;
+  xd3_dinst                 *code_table_alloc;
+
+  /* secondary compression */
+  const xd3_sec_type *sec_type;
+  xd3_sec_stream     *sec_stream_d;
+  xd3_sec_stream     *sec_stream_i;
+  xd3_sec_stream     *sec_stream_a;
+
+#if XD3_DEBUG
+  /* statistics */
+  usize_t            n_cpy;
+  usize_t            n_add;
+  usize_t            n_run;
+
+  usize_t            n_ibytes;
+  usize_t            n_sbytes;
+  usize_t            n_dbytes;
+
+  usize_t            l_cpy;
+  usize_t            l_add;
+  usize_t            l_run;
+
+  usize_t            sh_searches;
+  usize_t            sh_compares;
+
+  usize_t           *i_freqs;
+  usize_t           *i_modes;
+  usize_t           *i_sizes;
+
+  usize_t            large_ckcnt;
+
+  /* memory usage */
+  usize_t            alloc_cnt;
+  usize_t            free_cnt;
+
+  xoff_t            n_emit;
+#endif
+};
+
+/******************************************************************************************
+ PUBLIC FUNCTIONS
+ ******************************************************************************************/
+
+/* The two I/O disciplines, encode and decode, have similar stream semantics.  It is
+ * recommended that applications use the same code for compression and decompression -
+ * because there are only a few differences in handling encoding/decoding.
+ *
+ * See also the xd3_avail_input() and xd3_consume_output() routines, inlined below.
+ *
+ *   XD3_INPUT:  the process requires more input: call xd3_avail_input() then repeat
+ *   XD3_OUTPUT: the process has more output: read stream->next_out, stream->avail_out,
+ *               then call xd3_consume_output(), then repeat
+ *   XD3_GOTHEADER: (decoder-only) notification returned following the VCDIFF header and
+ *               first window header.  the decoder may use the header to configure itself.
+ *   XD3_WINSTART: a general notification returned once for each window except the 0-th
+ *               window, which is implied by XD3_GOTHEADER.  It is recommended to
+ *               use a switch-stmt such as:
+ *                 ...
+ *               again:
+ *                 switch ((ret = xd3_decode_input (stream))) {
+ *                    case XD3_GOTHEADER: {
+ *                      assert(stream->current_window == 0);
+ *                      stuff;
+ *                    }
+ *                    // fallthrough 
+ *                    case XD3_WINSTART: {
+ *                      something(stream->current_window);
+ *                      goto again;
+ *                    }
+ *                    ...
+ *   XD3_WINFINISH: a general notification, following the complete input & output of a
+ *               window.  at this point, stream->total_in and stream->total_out are
+ *               consistent for either encoding or decoding.
+ *   XD3_GETSRCBLK: If the xd3_getblk() callback is NULL, this value is returned to
+ *               initiate a non-blocking source read.
+ *
+ * For simple usage, see the xd3_process_completely() function, which underlies
+ * xd3_encode_completely() and xd3_decode_completely() [xdelta3.c].  For real application
+ * usage, including the application header, the see command-line utility [xdelta3-main.h].
+ *
+ * main_input() implements the command-line encode and decode as well as the optional
+ * VCDIFF_TOOLS printhdr, printhdrs, and printdelta with a single loop [xdelta3-main.h].
+ */
+int     xd3_decode_input  (xd3_stream    *stream);
+int     xd3_encode_input  (xd3_stream    *stream);
+
+/* The xd3_config structure is used to initialize a stream - all data is copied into
+ * stream so config may be a temporary variable.  See the [documentation] or comments on
+ * the xd3_config structure. */
+int     xd3_config_stream (xd3_stream    *stream,
+			   xd3_config    *config);
+
+/* Since Xdelta3 doesn't open any files, xd3_close_stream is just an error check that the
+ * stream is in a proper state to be closed: this means the encoder is flushed and the
+ * decoder is at a window boundary.  The application is responsible for freeing any of the
+ * resources it supplied. */
+int     xd3_close_stream (xd3_stream    *stream);
+
+/* This unconditionally closes/frees the stream, future close() will succeed.*/
+void    xd3_abort_stream (xd3_stream    *stream);
+
+/* xd3_free_stream frees all memory allocated for the stream.  The application is
+ * responsible for freeing any of the resources it supplied. */
+void    xd3_free_stream   (xd3_stream    *stream);
+
+/* This function informs the encoder or decoder that source matching (i.e.,
+ * delta-compression) is possible.  For encoding, this should be called before the first
+ * xd3_encode_input.  A NULL source is ignored.  For decoding, this should be called
+ * before the first window is decoded, but the appheader may be read first
+ * (XD3_GOTHEADER).  At this point, consult xd3_decoder_needs_source(), inlined below, to
+ * determine if a source is expected by the decoder. */
+int     xd3_set_source    (xd3_stream    *stream,
+			   xd3_source    *source);
+
+/* This function invokes xd3_encode_input using whole-file, in-memory inputs.  The output
+ * array must be large enough to hold the output or else ENOSPC is returned. */
+int     xd3_encode_completely (xd3_stream    *stream,
+			       const uint8_t *input,
+			       usize_t         input_size,
+			       uint8_t       *output,
+			       usize_t        *output_size,
+			       usize_t         avail_output);
+
+/* This function invokes xd3_decode_input using whole-file, in-memory inputs.  The output
+ * array must be large enough to hold the output or else ENOSPC is returned. */
+int     xd3_decode_completely (xd3_stream    *stream,
+			       const uint8_t *input,
+			       usize_t         input_size,
+			       uint8_t       *output,
+			       usize_t        *output_size,
+			       usize_t         avail_size);
+
+/* This should be called before the first call to xd3_encode_input() to include
+ * application-specific data in the VCDIFF header. */
+void    xd3_set_appheader (xd3_stream    *stream,
+			   const uint8_t *data,
+			   usize_t         size);
+
+/* xd3_get_appheader may be called in the decoder after XD3_GOTHEADER.  For convenience,
+ * the decoder always adds a single byte padding to the end of the application header,
+ * which is set to zero in case the application header is a string. */
+int     xd3_get_appheader (xd3_stream     *stream,
+			   uint8_t       **data,
+			   usize_t         *size);
+
+/* After receiving XD3_GOTHEADER, the decoder should check this function which returns 1
+ * if the decoder will require source data. */
+int     xd3_decoder_needs_source (xd3_stream *stream);
+
+/* Includes the above rvalues */
+const char* xd3_strerror (int ret);
+
+/* For convenience, zero & initialize the xd3_config structure with specified flags. */
+static inline
+void    xd3_init_config (xd3_config *config,
+			 int         flags)
+{
+  memset (config, 0, sizeof (*config));
+  config->flags = flags;
+}
+
+/* This supplies some input to the stream. */
+static inline
+void    xd3_avail_input  (xd3_stream    *stream,
+			  const uint8_t *idata,
+			  usize_t         isize)
+{
+  /* Even if isize is zero, the code expects a non-NULL idata.  Why?  It uses this value
+   * to determine whether xd3_avail_input has ever been called.  If xd3_encode_input is
+   * called before xd3_avail_input it will return XD3_INPUT right away without allocating
+   * a stream->winsize buffer.  This is to avoid an unwanted allocation. */
+  XD3_ASSERT (idata != NULL);
+
+  /* TODO: Should check for a call to xd3_avail_input in the wrong state. */
+  stream->next_in  = idata;
+  stream->avail_in = isize;
+}
+
+/* This acknowledges receipt of output data, must be called after any XD3_OUTPUT
+ * return. */
+static inline
+void xd3_consume_output (xd3_stream  *stream)
+{
+  /* TODO: Is it correct to set avail_in = 0 here, then check == 0 in avail_in? */
+  stream->avail_out  = 0;
+}
+
+/* These are set for each XD3_WINFINISH return. */
+static inline
+int     xd3_encoder_used_source (xd3_stream *stream) { return stream->src != NULL && stream->src->srclen > 0; }
+static inline
+xoff_t  xd3_encoder_srcbase (xd3_stream *stream) { return stream->src->srcbase; }
+static inline
+usize_t  xd3_encoder_srclen (xd3_stream *stream) { return stream->src->srclen; }
+
+/* Checks for legal flag changes. */
+static inline
+void xd3_set_flags (xd3_stream *stream, int flags)
+{
+  /* The bitwise difference should contain only XD3_FLUSH or XD3_SKIP_WINDOW */
+  XD3_ASSERT(((flags ^ stream->flags) & ~(XD3_FLUSH | XD3_SKIP_WINDOW)) == 0);
+  stream->flags = flags;
+}
+
+/* Gives some extra information about the latest library error, if any is known. */
+static inline
+const char* xd3_errstring (xd3_stream  *stream)
+{
+  return stream->msg ? stream->msg : "";
+}
+
+/* This function tells the number of bytes expected to be set in source->onblk after a
+ * getblk request.  This is for convenience of handling a partial last block. */
+static inline
+usize_t xd3_bytes_on_srcblk (xd3_source *source, xoff_t blkno)
+{
+  XD3_ASSERT (blkno < source->blocks);
+
+  if (blkno != source->blocks - 1)
+    {
+      return source->blksize;
+    }
+
+  return ((source->size - 1) % source->blksize) + 1;
+}
+
+#endif /* _XDELTA3_H_ */
diff --git a/xdelta3/xdelta3.prj b/xdelta3/xdelta3.prj
new file mode 100755
index 0000000..df1a445
--- /dev/null
+++ b/xdelta3/xdelta3.prj
@@ -0,0 +1,133 @@
+;; -*- Prcs -*-
+(Created-By-Prcs-Version 1 3 3)
+(Project-Description "")
+(Project-Version xdelta3 0 5)
+(Parent-Version xdelta3 0 4)
+(Version-Log "write a bit of documentation, work to fix/clean the regression test, fixed one actual bug in xd3_stream_close()")
+(New-Version-Log "")
+(Checkin-Time "Sun, 30 May 2004 14:42:47 -0700")
+(Checkin-Login jmacd)
+(Populate-Ignore ())
+(Project-Keywords
+ (WWWLeftNavBar "<table cellpadding=\"20px\" width=700> <tr> <td class=\"leftbdr\" valign=top height=600 width=100> <div class=\"leftbody\"> <h1>Xdelta</h1> <a href=\"xdelta3.html\">overview</a><br> <a href=\"xdelta3-cmdline.html\">command&nbsp;line</a><br> <a href=\"xdelta3-api-guide.html\">api&nbsp;guide</a><br> <br><a href=\"http://xdelta.org\">xdelta.org</a></h2> </div> </td> <td valign=top width=500>")
+ )
+(Files
+
+;; Files added by populate at Sun, 20 Jul 2003 04:22:04 +0400,
+;; to version 0.0(w), by jmacd:
+
+  (Makefile (xdelta3/0_Makefile 1.3 644))
+  (xdelta3.h (xdelta3/1_xdelta3.h 1.2 644))
+  (xdelta3-test.h (xdelta3/2_xdelta3-te 1.2 644))
+  (xdelta3-second.h (xdelta3/3_xdelta3-se 1.1 644))
+  (xdelta3-python.h (xdelta3/4_xdelta3-py 1.1 644))
+  (xdelta3-main.h (xdelta3/5_xdelta3-ma 1.3 644))
+  (xdelta3-list.h (xdelta3/6_xdelta3-li 1.1 644))
+  (xdelta3-fgk.h (xdelta3/7_xdelta3-fg 1.1 644))
+  (xdelta3-djw.h (xdelta3/8_xdelta3-dj 1.1 644))
+  (xdelta3-cfgs.h (xdelta3/9_xdelta3-cf 1.1 644))
+  (xdelta3-regtest.py (xdelta3/10_xdelta3-re 1.3 755))
+  (setup.py (xdelta3/11_setup.py 1.1 644))
+  (analyze_pfx.py (xdelta3/12_analyze_pf 1.1 644))
+  (analyze_gp.py (xdelta3/13_analyze_gp 1.1 644))
+  (analyze_clen.py (xdelta3/14_analyze_cl 1.1 644))
+  (rcs_junk.cc (xdelta3/15_rcs_junk.c 1.1 644))
+  (xdelta3.c (xdelta3/16_xdelta3.c 1.3 644))
+  (testh.c (xdelta3/17_testh.c 1.1 644))
+  (show.c (xdelta3/18_show.c 1.1 644))
+  (linkxd3lib.c (xdelta3/19_linkxd3lib 1.1 644))
+  (badcopy.c (xdelta3/20_badcopy.c 1.1 644))
+
+;; Files added by populate at Sun, 20 Jul 2003 04:22:08 +0400,
+;; to version 0.0(w), by jmacd:
+
+  (save.regtest.bug9/foo,v (xdelta3/21_foo,vx 1.1 444) :no-keywords)
+  (save.regtest.bug9/foo2,v (xdelta3/22_foo2,vx 1.1 444) :no-keywords)
+  (save.regtest.bug8/core (xdelta3/23_core 1.1 600) :no-keywords)
+  (save.regtest.bug8/output.x.right (xdelta3/24_output.x.r 1.1 644) :no-keywords)
+  (save.regtest.bug8/output.x (xdelta3/25_output.x 1.1 644) :no-keywords)
+  (save.regtest.bug8/input.1.xz (xdelta3/26_input.1.xz 1.1 644) :no-keywords)
+  (save.regtest.bug8/input.0.xz (xdelta3/27_input.0.xz 1.1 644) :no-keywords)
+  (save.regtest.bug8/input.0 (xdelta3/28_input.0 1.1 644))
+  (save.regtest.bug8/input.1 (xdelta3/29_input.1 1.1 644))
+  (save.regtest.bug7/core (xdelta3/30_core 1.1 600) :no-keywords)
+  (save.regtest.bug7/recon.x (xdelta3/31_recon.x 1.1 644) :no-keywords)
+  (save.regtest.bug7/output.x (xdelta3/32_output.x 1.1 644) :no-keywords)
+  (save.regtest.bug7/input.1 (xdelta3/33_input.1 1.1 644) :no-keywords)
+  (save.regtest.bug7/input.0 (xdelta3/34_input.0 1.1 644) :no-keywords)
+  (save.regtest.bug7/output (xdelta3/35_output 1.1 644) :no-keywords)
+  (save.regtest.bug7/recon (xdelta3/36_recon 1.1 644) :no-keywords)
+  (save.regtest.bug6/recon (xdelta3/37_recon 1.1 644))
+  (save.regtest.bug6/input.21 (xdelta3/38_input.21 1.1 644))
+  (save.regtest.bug6/input.20 (xdelta3/39_input.20 1.1 644))
+  (save.regtest.bug6/input.0 (xdelta3/40_input.0 1.1 644))
+  (save.regtest.bug6/output (xdelta3/41_output 1.1 644) :no-keywords)
+  (save.regtest.bug5/input.1 (xdelta3/42_input.1 1.1 644) :no-keywords)
+  (save.regtest.bug5/input.0 (xdelta3/43_input.0 1.1 644) :no-keywords)
+  (save.regtest.bug4/input.1 (xdelta3/44_input.1 1.1 644) :no-keywords)
+  (save.regtest.bug4/input.0 (xdelta3/45_input.0 1.1 644) :no-keywords)
+  (save.regtest.bug3/input.1 (xdelta3/46_input.1 1.1 644))
+  (save.regtest.bug3/input.0 (xdelta3/47_input.0 1.1 644))
+  (save.regtest.bug2/input.1 (xdelta3/48_input.1 1.1 644) :no-keywords)
+  (save.regtest.bug2/input.0 (xdelta3/49_input.0 1.1 644) :no-keywords)
+  (save.regtest.bug12/xd3regtest.27181/input.1 (xdelta3/50_input.1 1.1 644) :no-keywords)
+  (save.regtest.bug12/xd3regtest.27181/input.0 (xdelta3/51_input.0 1.1 644) :no-keywords)
+  (save.regtest.bug12/xd3regtest.27181/output (xdelta3/b/0_output 1.1 644) :no-keywords)
+  (save.regtest.bug12/output.x (xdelta3/b/1_output.x 1.1 644) :no-keywords)
+  (save.regtest.bug12/input.1 (xdelta3/b/2_input.1 1.1 644))
+  (save.regtest.bug12/input.0 (xdelta3/b/3_input.0 1.1 644))
+  (save.regtest.bug12/output (xdelta3/b/4_output 1.1 644) :no-keywords)
+  (save.regtest.bug11/recon.x (xdelta3/b/5_recon.x 1.1 644))
+  (save.regtest.bug11/output.x (xdelta3/b/6_output.x 1.1 644) :no-keywords)
+  (save.regtest.bug11/input.1 (xdelta3/b/7_input.1 1.1 644))
+  (save.regtest.bug11/input.0 (xdelta3/b/8_input.0 1.1 644))
+  (save.regtest.bug11/output (xdelta3/b/9_output 1.1 644) :no-keywords)
+  (save.regtest.bug11/recon (xdelta3/b/10_recon 1.1 644))
+  (save.regtest.bug10/recon.x (xdelta3/b/11_recon.x 1.1 644) :no-keywords)
+  (save.regtest.bug10/output.x (xdelta3/b/12_output.x 1.1 644) :no-keywords)
+  (save.regtest.bug10/input.1 (xdelta3/b/13_input.1 1.1 644) :no-keywords)
+  (save.regtest.bug10/input.0 (xdelta3/b/14_input.0 1.1 644) :no-keywords)
+  (save.regtest.bug10/output (xdelta3/b/15_output 1.1 644) :no-keywords)
+  (save.regtest.bug1/input.4 (xdelta3/b/16_input.4 1.1 644))
+  (save.regtest.bug1/input.5 (xdelta3/b/17_input.5 1.1 644))
+
+;; Files added by populate at Sun, 20 Jul 2003 04:22:28 +0400,
+;; to version 0.0(w), by jmacd:
+
+  (priorities.txt (xdelta3/b/18_priorities 1.1 644))
+
+;; Files added by populate at Sun, 20 Jul 2003 04:22:40 +0400,
+;; to version 0.0(w), by jmacd:
+
+  (vcdiff.ps (xdelta3/b/19_vcdiff.ps 1.1 644))
+  (draft-vcdiff-huffman.txt (xdelta3/b/20_draft-vcdi 1.1 600))
+
+;; Files added by populate at Sun, 20 Jul 2003 04:22:59 +0400,
+;; to version 0.0(w), by jmacd:
+
+  (dead.code (xdelta3/b/21_dead.code 1.2 644))
+
+;; Files added by populate at Sun, 20 Jul 2003 04:23:05 +0400,
+;; to version 0.0(w), by jmacd:
+
+  (draft-korn-vcdiff.txt (xdelta3/b/22_draft-korn 1.1 600))
+
+;; Files added by populate at Sun, 20 Jul 2003 08:16:41 +0400,
+;; to version 0.1(w), by jmacd:
+
+  (www/xdelta3-api-guide.html (xdelta3/b/23_Xdelta3-ap 1.4 644))
+  (www/xdelta3.html (xdelta3/b/24_Xdelta3.ht 1.4 644))
+
+;; Files added by populate at Sun, 20 Jul 2003 22:35:48 +0400,
+;; to version 0.2(w), by jmacd:
+
+  (www/xdelta3-cmdline.html (xdelta3/b/25_xdelta3-cm 1.2 644))
+  (www/xdelta3.css (xdelta3/b/26_xdelta3.cs 1.3 644))
+
+;; Files added by populate at Wed, 21 Jul 2004 15:39:04 -0700,
+;; to version 0.5(w), by jmacd:
+
+  (gpl.txt ())
+)
+(Merge-Parents)
+(New-Merge-Parents)
-- 
cgit v1.2.3