summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore18
-rw-r--r--xdelta3/Makefile.all40
-rw-r--r--xdelta3/Makefile.am16
-rw-r--r--xdelta3/configure.ac1
-rw-r--r--xdelta3/cpp-btree/CMakeLists.txt40
-rw-r--r--xdelta3/cpp-btree/COPYING202
-rw-r--r--xdelta3/cpp-btree/README31
-rw-r--r--xdelta3/cpp-btree/btree.h2394
-rw-r--r--xdelta3/cpp-btree/btree_bench.cc593
-rw-r--r--xdelta3/cpp-btree/btree_container.h349
-rw-r--r--xdelta3/cpp-btree/btree_map.h130
-rw-r--r--xdelta3/cpp-btree/btree_set.h121
-rw-r--r--xdelta3/cpp-btree/btree_test.cc270
-rw-r--r--xdelta3/cpp-btree/btree_test.h940
-rw-r--r--xdelta3/cpp-btree/btree_test_flags.cc20
-rw-r--r--xdelta3/cpp-btree/safe_btree.h395
-rw-r--r--xdelta3/cpp-btree/safe_btree_map.h89
-rw-r--r--xdelta3/cpp-btree/safe_btree_set.h88
-rw-r--r--xdelta3/cpp-btree/safe_btree_test.cc116
-rw-r--r--xdelta3/examples/checksum_test.cc732
-rw-r--r--xdelta3/go/src/regtest.go274
-rw-r--r--xdelta3/go/src/xdelta/rstream.go71
-rw-r--r--xdelta3/go/src/xdelta/run.go71
-rw-r--r--xdelta3/go/src/xdelta/test.go164
-rw-r--r--xdelta3/go/src/xdelta/tgroup.go97
l---------xdelta3/py-compile1
-rwxr-xr-xxdelta3/run_release.sh18
-rw-r--r--xdelta3/testing/Makefile3
-rw-r--r--xdelta3/testing/checksum_test.cc756
-rw-r--r--xdelta3/testing/checksum_test_c.c174
-rw-r--r--xdelta3/testing/delta.h6
-rw-r--r--xdelta3/testing/regtest.cc4
-rwxr-xr-xxdelta3/testing/run_release.sh2
-rw-r--r--xdelta3/testing/test.h6
-rw-r--r--xdelta3/xdelta3-blkcache.h19
-rw-r--r--xdelta3/xdelta3-decode.h30
-rw-r--r--xdelta3/xdelta3-djw.h12
-rw-r--r--xdelta3/xdelta3-fgk.h12
-rw-r--r--xdelta3/xdelta3-hash.h157
-rw-r--r--xdelta3/xdelta3-internal.h153
-rw-r--r--xdelta3/xdelta3-list.h2
-rw-r--r--xdelta3/xdelta3-lzma.h3
-rw-r--r--xdelta3/xdelta3-main.h101
-rw-r--r--xdelta3/xdelta3-second.h2
-rw-r--r--xdelta3/xdelta3-test.h83
-rw-r--r--xdelta3/xdelta3.c291
-rw-r--r--xdelta3/xdelta3.h133
47 files changed, 8058 insertions, 1172 deletions
diff --git a/.gitignore b/.gitignore
index 26fa702..a5b5926 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,22 +1,32 @@
1.dirstamp 1*.o
2*~
2.deps 3.deps
4.dirstamp
3INSTALL 5INSTALL
4Makefile 6Makefile
5Makefile.in 7Makefile.in
6aclocal.m4 8aclocal.m4
7autom4te.cache 9autom4te.cache
10build
8compile 11compile
9config.guess 12config.guess
10config.h 13config.h
11config.h.in~ 14config.h.in
12config.log 15config.log
13config.status 16config.status
14config.sub 17config.sub
18config.sub
15configure 19configure
16depcomp 20depcomp
17libtool 21libtool
22libtool.m4
23ltmain.sh
24ltoptions.m4
25ltsugar.m4
26ltversion.m4
27lt~obsolete.m4
28missing
18stamp-h1 29stamp-h1
19*.o 30xdelta3/xdelta3
20xdelta3
21xdelta3decode 31xdelta3decode
22xdelta3regtest 32xdelta3regtest
diff --git a/xdelta3/Makefile.all b/xdelta3/Makefile.all
new file mode 100644
index 0000000..c8e8cc0
--- /dev/null
+++ b/xdelta3/Makefile.all
@@ -0,0 +1,40 @@
1# -*- Mode: Makefile -*-
2all: 32_32_32 32_32_64 32_64_64 64_32_32 64_32_64 64_64_64
3
432_32_32:
5 (cd ./build/m32/32size-32off && $(MAKE) all)
6
732_32_64:
8 (cd ./build/m32/32size-64off && $(MAKE) all)
9
1032_64_64:
11 (cd ./build/m32/64size-64off && $(MAKE) all)
12
1364_32_32:
14 (cd ./build/m64/32size-32off && $(MAKE) all)
15
1664_32_64:
17 (cd ./build/m64/32size-64off && $(MAKE) all)
18
1964_64_64:
20 (cd ./build/m64/64size-64off && $(MAKE) all)
21
22clean: 32_32_32_clean 32_32_64_clean 32_64_64_clean 64_32_32_clean 64_32_64_clean 64_64_64_clean
23
2432_32_32_clean:
25 (cd ./build/m32/32size-32off && $(MAKE) clean)
26
2732_32_64_clean:
28 (cd ./build/m32/32size-64off && $(MAKE) clean)
29
3032_64_64_clean:
31 (cd ./build/m32/64size-64off && $(MAKE) clean)
32
3364_32_32_clean:
34 (cd ./build/m64/32size-32off && $(MAKE) clean)
35
3664_32_64_clean:
37 (cd ./build/m64/32size-64off && $(MAKE) clean)
38
3964_64_64_clean:
40 (cd ./build/m64/64size-64off && $(MAKE) clean)
diff --git a/xdelta3/Makefile.am b/xdelta3/Makefile.am
index 662ff02..f437764 100644
--- a/xdelta3/Makefile.am
+++ b/xdelta3/Makefile.am
@@ -1,7 +1,8 @@
1ACLOCAL_AMFLAGS = -I m4 1ACLOCAL_AMFLAGS = -I m4
2AUTOMAKE_OPTIONS = subdir-objects
2 3
3bin_PROGRAMS = xdelta3 4bin_PROGRAMS = xdelta3
4noinst_PROGRAMS = xdelta3regtest xdelta3decode 5noinst_PROGRAMS = xdelta3regtest xdelta3decode xdelta3checksum
5 6
6export AFL_HARDEN 7export AFL_HARDEN
7 8
@@ -37,14 +38,19 @@ xdelta3regtest_SOURCES = $(common_SOURCES) \
37 testing/sizes.h \ 38 testing/sizes.h \
38 testing/test.h 39 testing/test.h
39 40
41xdelta3checksum_SOURCES = $(common_SOURCES) \
42 testing/checksum_test.cc \
43 testing/checksum_test_c.c
44
40# Note: for extra sanity checks, enable -Wconversion. Note there 45# Note: for extra sanity checks, enable -Wconversion. Note there
41# are a lot of false positives. 46# are a lot of false positives.
42WFLAGS = -Wall -Wshadow -fno-builtin -Wextra -Wsign-compare \ 47WFLAGS = -Wall -Wshadow -fno-builtin -Wextra -Wsign-compare \
43 -Wextra -Wno-unused-parameter -Wno-unused-function 48 -Wno-unused-parameter -Wformat -Wno-unused-function # -Wconversion
44 49
45C_WFLAGS = $(WFLAGS) -pedantic -std=c99 50C_WFLAGS = $(WFLAGS) -pedantic -std=c99
46CXX_WFLAGS = $(WFLAGS) 51CXX_WFLAGS = $(WFLAGS)
47 52
53# TODO add -O3
48common_CFLAGS = \ 54common_CFLAGS = \
49 -DREGRESSION_TEST=1 \ 55 -DREGRESSION_TEST=1 \
50 -DSECONDARY_DJW=1 \ 56 -DSECONDARY_DJW=1 \
@@ -80,6 +86,12 @@ xdelta3regtest_CFLAGS = \
80 $(C_WFLAGS) $(common_CFLAGS) -DNOT_MAIN=1 -DXD3_DEBUG=1 86 $(C_WFLAGS) $(common_CFLAGS) -DNOT_MAIN=1 -DXD3_DEBUG=1
81xdelta3regtest_LDADD = -lm 87xdelta3regtest_LDADD = -lm
82 88
89xdelta3checksum_CXXFLAGS = \
90 $(CXX_WFLAGS) $(common_CFLAGS) -DNOT_MAIN=1 -DXD3_MAIN=1 -std=c++11
91xdelta3checksum_CFLAGS = \
92 $(C_WFLAGS) $(common_CFLAGS) -DNOT_MAIN=1 -DXD3_MAIN=1
93
94
83man1_MANS = xdelta3.1 95man1_MANS = xdelta3.1
84 96
85EXTRA_DIST = \ 97EXTRA_DIST = \
diff --git a/xdelta3/configure.ac b/xdelta3/configure.ac
index ce24045..dbcac4f 100644
--- a/xdelta3/configure.ac
+++ b/xdelta3/configure.ac
@@ -44,6 +44,7 @@ AC_ARG_ENABLE(debug-symbols,
44 AS_HELP_STRING(--enable-debug-symbols,[Build with debug symbols (default is NO)]),,enableval=no) 44 AS_HELP_STRING(--enable-debug-symbols,[Build with debug symbols (default is NO)]),,enableval=no)
45AM_CONDITIONAL([DEBUG_SYMBOLS], [test ${enableval} = "yes"]) 45AM_CONDITIONAL([DEBUG_SYMBOLS], [test ${enableval} = "yes"])
46 46
47
47AC_CONFIG_HEADERS([config.h]) 48AC_CONFIG_HEADERS([config.h])
48AC_CONFIG_FILES([Makefile]) 49AC_CONFIG_FILES([Makefile])
49AC_OUTPUT 50AC_OUTPUT
diff --git a/xdelta3/cpp-btree/CMakeLists.txt b/xdelta3/cpp-btree/CMakeLists.txt
new file mode 100644
index 0000000..d005e15
--- /dev/null
+++ b/xdelta3/cpp-btree/CMakeLists.txt
@@ -0,0 +1,40 @@
1# Copyright 2013 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15cmake_minimum_required(VERSION 2.6)
16
17project(cppbtree CXX)
18
19option(build_tests "Build B-tree tests" OFF)
20add_definitions(-std=c++11)
21set(CMAKE_CXX_FLAGS "-g -O2")
22
23# CMake doesn't have a way to pure template library,
24# add_library(cppbtree btree.h btree_map.h btree_set.h
25# safe_btree.h safe_btree_map.h safe_btree_set.h)
26# set_target_properties(cppbtree PROPERTIES LINKER_LANGUAGE CXX)
27
28if(build_tests)
29 enable_testing()
30 include_directories($ENV{GTEST_ROOT}/include)
31 link_directories($ENV{GTEST_ROOT})
32 include_directories($ENV{GFLAGS_ROOT}/include)
33 link_directories($ENV{GFLAGS_ROOT}/lib)
34 add_executable(btree_test btree_test.cc btree_test_flags.cc)
35 add_executable(safe_btree_test safe_btree_test.cc btree_test_flags.cc)
36 add_executable(btree_bench btree_bench.cc btree_test_flags.cc)
37 target_link_libraries(btree_test gtest_main gtest gflags)
38 target_link_libraries(safe_btree_test gtest_main gtest gflags)
39 target_link_libraries(btree_bench gflags gtest)
40endif()
diff --git a/xdelta3/cpp-btree/COPYING b/xdelta3/cpp-btree/COPYING
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/xdelta3/cpp-btree/COPYING
@@ -0,0 +1,202 @@
1
2 Apache License
3 Version 2.0, January 2004
4 http://www.apache.org/licenses/
5
6 TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
8 1. Definitions.
9
10 "License" shall mean the terms and conditions for use, reproduction,
11 and distribution as defined by Sections 1 through 9 of this document.
12
13 "Licensor" shall mean the copyright owner or entity authorized by
14 the copyright owner that is granting the License.
15
16 "Legal Entity" shall mean the union of the acting entity and all
17 other entities that control, are controlled by, or are under common
18 control with that entity. For the purposes of this definition,
19 "control" means (i) the power, direct or indirect, to cause the
20 direction or management of such entity, whether by contract or
21 otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 outstanding shares, or (iii) beneficial ownership of such entity.
23
24 "You" (or "Your") shall mean an individual or Legal Entity
25 exercising permissions granted by this License.
26
27 "Source" form shall mean the preferred form for making modifications,
28 including but not limited to software source code, documentation
29 source, and configuration files.
30
31 "Object" form shall mean any form resulting from mechanical
32 transformation or translation of a Source form, including but
33 not limited to compiled object code, generated documentation,
34 and conversions to other media types.
35
36 "Work" shall mean the work of authorship, whether in Source or
37 Object form, made available under the License, as indicated by a
38 copyright notice that is included in or attached to the work
39 (an example is provided in the Appendix below).
40
41 "Derivative Works" shall mean any work, whether in Source or Object
42 form, that is based on (or derived from) the Work and for which the
43 editorial revisions, annotations, elaborations, or other modifications
44 represent, as a whole, an original work of authorship. For the purposes
45 of this License, Derivative Works shall not include works that remain
46 separable from, or merely link (or bind by name) to the interfaces of,
47 the Work and Derivative Works thereof.
48
49 "Contribution" shall mean any work of authorship, including
50 the original version of the Work and any modifications or additions
51 to that Work or Derivative Works thereof, that is intentionally
52 submitted to Licensor for inclusion in the Work by the copyright owner
53 or by an individual or Legal Entity authorized to submit on behalf of
54 the copyright owner. For the purposes of this definition, "submitted"
55 means any form of electronic, verbal, or written communication sent
56 to the Licensor or its representatives, including but not limited to
57 communication on electronic mailing lists, source code control systems,
58 and issue tracking systems that are managed by, or on behalf of, the
59 Licensor for the purpose of discussing and improving the Work, but
60 excluding communication that is conspicuously marked or otherwise
61 designated in writing by the copyright owner as "Not a Contribution."
62
63 "Contributor" shall mean Licensor and any individual or Legal Entity
64 on behalf of whom a Contribution has been received by Licensor and
65 subsequently incorporated within the Work.
66
67 2. Grant of Copyright License. Subject to the terms and conditions of
68 this License, each Contributor hereby grants to You a perpetual,
69 worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 copyright license to reproduce, prepare Derivative Works of,
71 publicly display, publicly perform, sublicense, and distribute the
72 Work and such Derivative Works in Source or Object form.
73
74 3. Grant of Patent License. Subject to the terms and conditions of
75 this License, each Contributor hereby grants to You a perpetual,
76 worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 (except as stated in this section) patent license to make, have made,
78 use, offer to sell, sell, import, and otherwise transfer the Work,
79 where such license applies only to those patent claims licensable
80 by such Contributor that are necessarily infringed by their
81 Contribution(s) alone or by combination of their Contribution(s)
82 with the Work to which such Contribution(s) was submitted. If You
83 institute patent litigation against any entity (including a
84 cross-claim or counterclaim in a lawsuit) alleging that the Work
85 or a Contribution incorporated within the Work constitutes direct
86 or contributory patent infringement, then any patent licenses
87 granted to You under this License for that Work shall terminate
88 as of the date such litigation is filed.
89
90 4. Redistribution. You may reproduce and distribute copies of the
91 Work or Derivative Works thereof in any medium, with or without
92 modifications, and in Source or Object form, provided that You
93 meet the following conditions:
94
95 (a) You must give any other recipients of the Work or
96 Derivative Works a copy of this License; and
97
98 (b) You must cause any modified files to carry prominent notices
99 stating that You changed the files; and
100
101 (c) You must retain, in the Source form of any Derivative Works
102 that You distribute, all copyright, patent, trademark, and
103 attribution notices from the Source form of the Work,
104 excluding those notices that do not pertain to any part of
105 the Derivative Works; and
106
107 (d) If the Work includes a "NOTICE" text file as part of its
108 distribution, then any Derivative Works that You distribute must
109 include a readable copy of the attribution notices contained
110 within such NOTICE file, excluding those notices that do not
111 pertain to any part of the Derivative Works, in at least one
112 of the following places: within a NOTICE text file distributed
113 as part of the Derivative Works; within the Source form or
114 documentation, if provided along with the Derivative Works; or,
115 within a display generated by the Derivative Works, if and
116 wherever such third-party notices normally appear. The contents
117 of the NOTICE file are for informational purposes only and
118 do not modify the License. You may add Your own attribution
119 notices within Derivative Works that You distribute, alongside
120 or as an addendum to the NOTICE text from the Work, provided
121 that such additional attribution notices cannot be construed
122 as modifying the License.
123
124 You may add Your own copyright statement to Your modifications and
125 may provide additional or different license terms and conditions
126 for use, reproduction, or distribution of Your modifications, or
127 for any such Derivative Works as a whole, provided Your use,
128 reproduction, and distribution of the Work otherwise complies with
129 the conditions stated in this License.
130
131 5. Submission of Contributions. Unless You explicitly state otherwise,
132 any Contribution intentionally submitted for inclusion in the Work
133 by You to the Licensor shall be under the terms and conditions of
134 this License, without any additional terms or conditions.
135 Notwithstanding the above, nothing herein shall supersede or modify
136 the terms of any separate license agreement you may have executed
137 with Licensor regarding such Contributions.
138
139 6. Trademarks. This License does not grant permission to use the trade
140 names, trademarks, service marks, or product names of the Licensor,
141 except as required for reasonable and customary use in describing the
142 origin of the Work and reproducing the content of the NOTICE file.
143
144 7. Disclaimer of Warranty. Unless required by applicable law or
145 agreed to in writing, Licensor provides the Work (and each
146 Contributor provides its Contributions) on an "AS IS" BASIS,
147 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 implied, including, without limitation, any warranties or conditions
149 of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 PARTICULAR PURPOSE. You are solely responsible for determining the
151 appropriateness of using or redistributing the Work and assume any
152 risks associated with Your exercise of permissions under this License.
153
154 8. Limitation of Liability. In no event and under no legal theory,
155 whether in tort (including negligence), contract, or otherwise,
156 unless required by applicable law (such as deliberate and grossly
157 negligent acts) or agreed to in writing, shall any Contributor be
158 liable to You for damages, including any direct, indirect, special,
159 incidental, or consequential damages of any character arising as a
160 result of this License or out of the use or inability to use the
161 Work (including but not limited to damages for loss of goodwill,
162 work stoppage, computer failure or malfunction, or any and all
163 other commercial damages or losses), even if such Contributor
164 has been advised of the possibility of such damages.
165
166 9. Accepting Warranty or Additional Liability. While redistributing
167 the Work or Derivative Works thereof, You may choose to offer,
168 and charge a fee for, acceptance of support, warranty, indemnity,
169 or other liability obligations and/or rights consistent with this
170 License. However, in accepting such obligations, You may act only
171 on Your own behalf and on Your sole responsibility, not on behalf
172 of any other Contributor, and only if You agree to indemnify,
173 defend, and hold each Contributor harmless for any liability
174 incurred by, or claims asserted against, such Contributor by reason
175 of your accepting any such warranty or additional liability.
176
177 END OF TERMS AND CONDITIONS
178
179 APPENDIX: How to apply the Apache License to your work.
180
181 To apply the Apache License to your work, attach the following
182 boilerplate notice, with the fields enclosed by brackets "[]"
183 replaced with your own identifying information. (Don't include
184 the brackets!) The text should be enclosed in the appropriate
185 comment syntax for the file format. We also recommend that a
186 file or class name and description of purpose be included on the
187 same "printed page" as the copyright notice for easier
188 identification within third-party archives.
189
190 Copyright [yyyy] [name of copyright owner]
191
192 Licensed under the Apache License, Version 2.0 (the "License");
193 you may not use this file except in compliance with the License.
194 You may obtain a copy of the License at
195
196 http://www.apache.org/licenses/LICENSE-2.0
197
198 Unless required by applicable law or agreed to in writing, software
199 distributed under the License is distributed on an "AS IS" BASIS,
200 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 See the License for the specific language governing permissions and
202 limitations under the License.
diff --git a/xdelta3/cpp-btree/README b/xdelta3/cpp-btree/README
new file mode 100644
index 0000000..319fe9b
--- /dev/null
+++ b/xdelta3/cpp-btree/README
@@ -0,0 +1,31 @@
1This library is a C++ template library and, as such, there is no
2library to build and install. Copy the .h files and use them!
3
4See http://code.google.com/p/cpp-btree/wiki/UsageInstructions for
5details.
6
7----
8
9To build and run the provided tests, however, you will need to install
10CMake, the Google C++ Test framework, and the Google flags package.
11
12Download and install CMake from http://www.cmake.org
13
14Download and build the GoogleTest framework from
15http://code.google.com/p/googletest
16
17Download and install gflags from https://code.google.com/p/gflags
18
19Set GTEST_ROOT to the directory where GTEST was built.
20Set GFLAGS_ROOT to the directory prefix where GFLAGS is installed.
21
22export GTEST_ROOT=/path/for/gtest-x.y
23export GFLAGS_ROOT=/opt
24
25cmake . -Dbuild_tests=ON
26
27For example, to build on a Unix system with the clang++ compiler,
28
29export GTEST_ROOT=$(HOME)/src/googletest
30export GFLAGS_ROOT=/opt
31cmake . -G "Unix Makefiles" -Dbuild_tests=ON -DCMAKE_CXX_COMPILER=clang++
diff --git a/xdelta3/cpp-btree/btree.h b/xdelta3/cpp-btree/btree.h
new file mode 100644
index 0000000..cdd2b52
--- /dev/null
+++ b/xdelta3/cpp-btree/btree.h
@@ -0,0 +1,2394 @@
1// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// A btree implementation of the STL set and map interfaces. A btree is both
16// smaller and faster than STL set/map. The red-black tree implementation of
17// STL set/map has an overhead of 3 pointers (left, right and parent) plus the
18// node color information for each stored value. So a set<int32> consumes 20
19// bytes for each value stored. This btree implementation stores multiple
20// values on fixed size nodes (usually 256 bytes) and doesn't store child
21// pointers for leaf nodes. The result is that a btree_set<int32> may use much
22// less memory per stored value. For the random insertion benchmark in
23// btree_test.cc, a btree_set<int32> with node-size of 256 uses 4.9 bytes per
24// stored value.
25//
26// The packing of multiple values on to each node of a btree has another effect
27// besides better space utilization: better cache locality due to fewer cache
28// lines being accessed. Better cache locality translates into faster
29// operations.
30//
31// CAVEATS
32//
33// Insertions and deletions on a btree can cause splitting, merging or
34// rebalancing of btree nodes. And even without these operations, insertions
35// and deletions on a btree will move values around within a node. In both
36// cases, the result is that insertions and deletions can invalidate iterators
37// pointing to values other than the one being inserted/deleted. This is
38// notably different from STL set/map which takes care to not invalidate
39// iterators on insert/erase except, of course, for iterators pointing to the
40// value being erased. A partial workaround when erasing is available:
41// erase() returns an iterator pointing to the item just after the one that was
42// erased (or end() if none exists). See also safe_btree.
43
44// PERFORMANCE
45//
46// btree_bench --benchmarks=. 2>&1 | ./benchmarks.awk
47//
48// Run on pmattis-warp.nyc (4 X 2200 MHz CPUs); 2010/03/04-15:23:06
49// Benchmark STL(ns) B-Tree(ns) @ <size>
50// --------------------------------------------------------
51// BM_set_int32_insert 1516 608 +59.89% <256> [40.0, 5.2]
52// BM_set_int32_lookup 1160 414 +64.31% <256> [40.0, 5.2]
53// BM_set_int32_fulllookup 960 410 +57.29% <256> [40.0, 4.4]
54// BM_set_int32_delete 1741 528 +69.67% <256> [40.0, 5.2]
55// BM_set_int32_queueaddrem 3078 1046 +66.02% <256> [40.0, 5.5]
56// BM_set_int32_mixedaddrem 3600 1384 +61.56% <256> [40.0, 5.3]
57// BM_set_int32_fifo 227 113 +50.22% <256> [40.0, 4.4]
58// BM_set_int32_fwditer 158 26 +83.54% <256> [40.0, 5.2]
59// BM_map_int32_insert 1551 636 +58.99% <256> [48.0, 10.5]
60// BM_map_int32_lookup 1200 508 +57.67% <256> [48.0, 10.5]
61// BM_map_int32_fulllookup 989 487 +50.76% <256> [48.0, 8.8]
62// BM_map_int32_delete 1794 628 +64.99% <256> [48.0, 10.5]
63// BM_map_int32_queueaddrem 3189 1266 +60.30% <256> [48.0, 11.6]
64// BM_map_int32_mixedaddrem 3822 1623 +57.54% <256> [48.0, 10.9]
65// BM_map_int32_fifo 151 134 +11.26% <256> [48.0, 8.8]
66// BM_map_int32_fwditer 161 32 +80.12% <256> [48.0, 10.5]
67// BM_set_int64_insert 1546 636 +58.86% <256> [40.0, 10.5]
68// BM_set_int64_lookup 1200 512 +57.33% <256> [40.0, 10.5]
69// BM_set_int64_fulllookup 971 487 +49.85% <256> [40.0, 8.8]
70// BM_set_int64_delete 1745 616 +64.70% <256> [40.0, 10.5]
71// BM_set_int64_queueaddrem 3163 1195 +62.22% <256> [40.0, 11.6]
72// BM_set_int64_mixedaddrem 3760 1564 +58.40% <256> [40.0, 10.9]
73// BM_set_int64_fifo 146 103 +29.45% <256> [40.0, 8.8]
74// BM_set_int64_fwditer 162 31 +80.86% <256> [40.0, 10.5]
75// BM_map_int64_insert 1551 720 +53.58% <256> [48.0, 20.7]
76// BM_map_int64_lookup 1214 612 +49.59% <256> [48.0, 20.7]
77// BM_map_int64_fulllookup 994 592 +40.44% <256> [48.0, 17.2]
78// BM_map_int64_delete 1778 764 +57.03% <256> [48.0, 20.7]
79// BM_map_int64_queueaddrem 3189 1547 +51.49% <256> [48.0, 20.9]
80// BM_map_int64_mixedaddrem 3779 1887 +50.07% <256> [48.0, 21.6]
81// BM_map_int64_fifo 147 145 +1.36% <256> [48.0, 17.2]
82// BM_map_int64_fwditer 162 41 +74.69% <256> [48.0, 20.7]
83// BM_set_string_insert 1989 1966 +1.16% <256> [64.0, 44.5]
84// BM_set_string_lookup 1709 1600 +6.38% <256> [64.0, 44.5]
85// BM_set_string_fulllookup 1573 1529 +2.80% <256> [64.0, 35.4]
86// BM_set_string_delete 2520 1920 +23.81% <256> [64.0, 44.5]
87// BM_set_string_queueaddrem 4706 4309 +8.44% <256> [64.0, 48.3]
88// BM_set_string_mixedaddrem 5080 4654 +8.39% <256> [64.0, 46.7]
89// BM_set_string_fifo 318 512 -61.01% <256> [64.0, 35.4]
90// BM_set_string_fwditer 182 93 +48.90% <256> [64.0, 44.5]
91// BM_map_string_insert 2600 2227 +14.35% <256> [72.0, 55.8]
92// BM_map_string_lookup 2068 1730 +16.34% <256> [72.0, 55.8]
93// BM_map_string_fulllookup 1859 1618 +12.96% <256> [72.0, 44.0]
94// BM_map_string_delete 3168 2080 +34.34% <256> [72.0, 55.8]
95// BM_map_string_queueaddrem 5840 4701 +19.50% <256> [72.0, 59.4]
96// BM_map_string_mixedaddrem 6400 5200 +18.75% <256> [72.0, 57.8]
97// BM_map_string_fifo 398 596 -49.75% <256> [72.0, 44.0]
98// BM_map_string_fwditer 243 113 +53.50% <256> [72.0, 55.8]
99
100#ifndef UTIL_BTREE_BTREE_H__
101#define UTIL_BTREE_BTREE_H__
102
103#include <assert.h>
104#include <stddef.h>
105#include <string.h>
106#include <sys/types.h>
107#include <algorithm>
108#include <functional>
109#include <iostream>
110#include <iterator>
111#include <limits>
112#include <type_traits>
113#include <new>
114#include <ostream>
115#include <string>
116#include <utility>
117
118#ifndef NDEBUG
119#define NDEBUG 1
120#endif
121
122namespace btree {
123
124// Inside a btree method, if we just call swap(), it will choose the
125// btree::swap method, which we don't want. And we can't say ::swap
126// because then MSVC won't pickup any std::swap() implementations. We
127// can't just use std::swap() directly because then we don't get the
128// specialization for types outside the std namespace. So the solution
129// is to have a special swap helper function whose name doesn't
130// collide with other swap functions defined by the btree classes.
131template <typename T>
132inline void btree_swap_helper(T &a, T &b) {
133 using std::swap;
134 swap(a, b);
135}
136
137// A template helper used to select A or B based on a condition.
138template<bool cond, typename A, typename B>
139struct if_{
140 typedef A type;
141};
142
143template<typename A, typename B>
144struct if_<false, A, B> {
145 typedef B type;
146};
147
148// Types small_ and big_ are promise that sizeof(small_) < sizeof(big_)
149typedef char small_;
150
151struct big_ {
152 char dummy[2];
153};
154
155// A compile-time assertion.
156template <bool>
157struct CompileAssert {
158};
159
160#define COMPILE_ASSERT(expr, msg) \
161 typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
162
163// A helper type used to indicate that a key-compare-to functor has been
164// provided. A user can specify a key-compare-to functor by doing:
165//
166// struct MyStringComparer
167// : public util::btree::btree_key_compare_to_tag {
168// int operator()(const string &a, const string &b) const {
169// return a.compare(b);
170// }
171// };
172//
173// Note that the return type is an int and not a bool. There is a
174// COMPILE_ASSERT which enforces this return type.
175struct btree_key_compare_to_tag {
176};
177
178// A helper class that indicates if the Compare parameter is derived from
179// btree_key_compare_to_tag.
180template <typename Compare>
181struct btree_is_key_compare_to
182 : public std::is_convertible<Compare, btree_key_compare_to_tag> {
183};
184
185// A helper class to convert a boolean comparison into a three-way
186// "compare-to" comparison that returns a negative value to indicate
187// less-than, zero to indicate equality and a positive value to
188// indicate greater-than. This helper class is specialized for
189// less<string> and greater<string>. The btree_key_compare_to_adapter
190// class is provided so that btree users automatically get the more
191// efficient compare-to code when using common google string types
192// with common comparison functors.
193template <typename Compare>
194struct btree_key_compare_to_adapter : Compare {
195 btree_key_compare_to_adapter() { }
196 btree_key_compare_to_adapter(const Compare &c) : Compare(c) { }
197 btree_key_compare_to_adapter(const btree_key_compare_to_adapter<Compare> &c)
198 : Compare(c) {
199 }
200};
201
202template <>
203struct btree_key_compare_to_adapter<std::less<std::string> >
204 : public btree_key_compare_to_tag {
205 btree_key_compare_to_adapter() {}
206 btree_key_compare_to_adapter(const std::less<std::string>&) {}
207 btree_key_compare_to_adapter(
208 const btree_key_compare_to_adapter<std::less<std::string> >&) {}
209 int operator()(const std::string &a, const std::string &b) const {
210 return a.compare(b);
211 }
212};
213
214template <>
215struct btree_key_compare_to_adapter<std::greater<std::string> >
216 : public btree_key_compare_to_tag {
217 btree_key_compare_to_adapter() {}
218 btree_key_compare_to_adapter(const std::greater<std::string>&) {}
219 btree_key_compare_to_adapter(
220 const btree_key_compare_to_adapter<std::greater<std::string> >&) {}
221 int operator()(const std::string &a, const std::string &b) const {
222 return b.compare(a);
223 }
224};
225
226// A helper class that allows a compare-to functor to behave like a plain
227// compare functor. This specialization is used when we do not have a
228// compare-to functor.
229template <typename Key, typename Compare, bool HaveCompareTo>
230struct btree_key_comparer {
231 btree_key_comparer() {}
232 btree_key_comparer(Compare c) : comp(c) {}
233 static bool bool_compare(const Compare &comp, const Key &x, const Key &y) {
234 return comp(x, y);
235 }
236 bool operator()(const Key &x, const Key &y) const {
237 return bool_compare(comp, x, y);
238 }
239 Compare comp;
240};
241
242// A specialization of btree_key_comparer when a compare-to functor is
243// present. We need a plain (boolean) comparison in some parts of the btree
244// code, such as insert-with-hint.
245template <typename Key, typename Compare>
246struct btree_key_comparer<Key, Compare, true> {
247 btree_key_comparer() {}
248 btree_key_comparer(Compare c) : comp(c) {}
249 static bool bool_compare(const Compare &comp, const Key &x, const Key &y) {
250 return comp(x, y) < 0;
251 }
252 bool operator()(const Key &x, const Key &y) const {
253 return bool_compare(comp, x, y);
254 }
255 Compare comp;
256};
257
258// A helper function to compare to keys using the specified compare
259// functor. This dispatches to the appropriate btree_key_comparer comparison,
260// depending on whether we have a compare-to functor or not (which depends on
261// whether Compare is derived from btree_key_compare_to_tag).
262template <typename Key, typename Compare>
263static bool btree_compare_keys(
264 const Compare &comp, const Key &x, const Key &y) {
265 typedef btree_key_comparer<Key, Compare,
266 btree_is_key_compare_to<Compare>::value> key_comparer;
267 return key_comparer::bool_compare(comp, x, y);
268}
269
270template <typename Key, typename Compare,
271 typename Alloc, int TargetNodeSize, int ValueSize>
272struct btree_common_params {
273 // If Compare is derived from btree_key_compare_to_tag then use it as the
274 // key_compare type. Otherwise, use btree_key_compare_to_adapter<> which will
275 // fall-back to Compare if we don't have an appropriate specialization.
276 typedef typename if_<
277 btree_is_key_compare_to<Compare>::value,
278 Compare, btree_key_compare_to_adapter<Compare> >::type key_compare;
279 // A type which indicates if we have a key-compare-to functor or a plain old
280 // key-compare functor.
281 typedef btree_is_key_compare_to<key_compare> is_key_compare_to;
282
283 typedef Alloc allocator_type;
284 typedef Key key_type;
285 typedef ssize_t size_type;
286 typedef ptrdiff_t difference_type;
287
288 enum {
289 kTargetNodeSize = TargetNodeSize,
290
291 // Available space for values. This is largest for leaf nodes,
292 // which has overhead no fewer than two pointers.
293 kNodeValueSpace = TargetNodeSize - 2 * sizeof(void*),
294 };
295
296 // This is an integral type large enough to hold as many
297 // ValueSize-values as will fit a node of TargetNodeSize bytes.
298 typedef typename if_<
299 (kNodeValueSpace / ValueSize) >= 256,
300 uint16_t,
301 uint8_t>::type node_count_type;
302};
303
304// A parameters structure for holding the type parameters for a btree_map.
305template <typename Key, typename Data, typename Compare,
306 typename Alloc, int TargetNodeSize>
307struct btree_map_params
308 : public btree_common_params<Key, Compare, Alloc, TargetNodeSize,
309 sizeof(Key) + sizeof(Data)> {
310 typedef Data data_type;
311 typedef Data mapped_type;
312 typedef std::pair<const Key, data_type> value_type;
313 typedef std::pair<Key, data_type> mutable_value_type;
314 typedef value_type* pointer;
315 typedef const value_type* const_pointer;
316 typedef value_type& reference;
317 typedef const value_type& const_reference;
318
319 enum {
320 kValueSize = sizeof(Key) + sizeof(data_type),
321 };
322
323 static const Key& key(const value_type &x) { return x.first; }
324 static const Key& key(const mutable_value_type &x) { return x.first; }
325 static void swap(mutable_value_type *a, mutable_value_type *b) {
326 btree_swap_helper(a->first, b->first);
327 btree_swap_helper(a->second, b->second);
328 }
329};
330
331// A parameters structure for holding the type parameters for a btree_set.
332template <typename Key, typename Compare, typename Alloc, int TargetNodeSize>
333struct btree_set_params
334 : public btree_common_params<Key, Compare, Alloc, TargetNodeSize,
335 sizeof(Key)> {
336 typedef std::false_type data_type;
337 typedef std::false_type mapped_type;
338 typedef Key value_type;
339 typedef value_type mutable_value_type;
340 typedef value_type* pointer;
341 typedef const value_type* const_pointer;
342 typedef value_type& reference;
343 typedef const value_type& const_reference;
344
345 enum {
346 kValueSize = sizeof(Key),
347 };
348
349 static const Key& key(const value_type &x) { return x; }
350 static void swap(mutable_value_type *a, mutable_value_type *b) {
351 btree_swap_helper<mutable_value_type>(*a, *b);
352 }
353};
354
355// An adapter class that converts a lower-bound compare into an upper-bound
356// compare.
357template <typename Key, typename Compare>
358struct btree_upper_bound_adapter : public Compare {
359 btree_upper_bound_adapter(Compare c) : Compare(c) {}
360 bool operator()(const Key &a, const Key &b) const {
361 return !static_cast<const Compare&>(*this)(b, a);
362 }
363};
364
365template <typename Key, typename CompareTo>
366struct btree_upper_bound_compare_to_adapter : public CompareTo {
367 btree_upper_bound_compare_to_adapter(CompareTo c) : CompareTo(c) {}
368 int operator()(const Key &a, const Key &b) const {
369 return static_cast<const CompareTo&>(*this)(b, a);
370 }
371};
372
373// Dispatch helper class for using linear search with plain compare.
374template <typename K, typename N, typename Compare>
375struct btree_linear_search_plain_compare {
376 static int lower_bound(const K &k, const N &n, Compare comp) {
377 return n.linear_search_plain_compare(k, 0, n.count(), comp);
378 }
379 static int upper_bound(const K &k, const N &n, Compare comp) {
380 typedef btree_upper_bound_adapter<K, Compare> upper_compare;
381 return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp));
382 }
383};
384
385// Dispatch helper class for using linear search with compare-to
386template <typename K, typename N, typename CompareTo>
387struct btree_linear_search_compare_to {
388 static int lower_bound(const K &k, const N &n, CompareTo comp) {
389 return n.linear_search_compare_to(k, 0, n.count(), comp);
390 }
391 static int upper_bound(const K &k, const N &n, CompareTo comp) {
392 typedef btree_upper_bound_adapter<K,
393 btree_key_comparer<K, CompareTo, true> > upper_compare;
394 return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp));
395 }
396};
397
398// Dispatch helper class for using binary search with plain compare.
399template <typename K, typename N, typename Compare>
400struct btree_binary_search_plain_compare {
401 static int lower_bound(const K &k, const N &n, Compare comp) {
402 return n.binary_search_plain_compare(k, 0, n.count(), comp);
403 }
404 static int upper_bound(const K &k, const N &n, Compare comp) {
405 typedef btree_upper_bound_adapter<K, Compare> upper_compare;
406 return n.binary_search_plain_compare(k, 0, n.count(), upper_compare(comp));
407 }
408};
409
410// Dispatch helper class for using binary search with compare-to.
411template <typename K, typename N, typename CompareTo>
412struct btree_binary_search_compare_to {
413 static int lower_bound(const K &k, const N &n, CompareTo comp) {
414 return n.binary_search_compare_to(k, 0, n.count(), CompareTo());
415 }
416 static int upper_bound(const K &k, const N &n, CompareTo comp) {
417 typedef btree_upper_bound_adapter<K,
418 btree_key_comparer<K, CompareTo, true> > upper_compare;
419 return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp));
420 }
421};
422
423// A node in the btree holding. The same node type is used for both internal
424// and leaf nodes in the btree, though the nodes are allocated in such a way
425// that the children array is only valid in internal nodes.
426template <typename Params>
427class btree_node {
428 public:
429 typedef Params params_type;
430 typedef btree_node<Params> self_type;
431 typedef typename Params::key_type key_type;
432 typedef typename Params::data_type data_type;
433 typedef typename Params::value_type value_type;
434 typedef typename Params::mutable_value_type mutable_value_type;
435 typedef typename Params::pointer pointer;
436 typedef typename Params::const_pointer const_pointer;
437 typedef typename Params::reference reference;
438 typedef typename Params::const_reference const_reference;
439 typedef typename Params::key_compare key_compare;
440 typedef typename Params::size_type size_type;
441 typedef typename Params::difference_type difference_type;
442 // Typedefs for the various types of node searches.
443 typedef btree_linear_search_plain_compare<
444 key_type, self_type, key_compare> linear_search_plain_compare_type;
445 typedef btree_linear_search_compare_to<
446 key_type, self_type, key_compare> linear_search_compare_to_type;
447 typedef btree_binary_search_plain_compare<
448 key_type, self_type, key_compare> binary_search_plain_compare_type;
449 typedef btree_binary_search_compare_to<
450 key_type, self_type, key_compare> binary_search_compare_to_type;
451 // If we have a valid key-compare-to type, use linear_search_compare_to,
452 // otherwise use linear_search_plain_compare.
453 typedef typename if_<
454 Params::is_key_compare_to::value,
455 linear_search_compare_to_type,
456 linear_search_plain_compare_type>::type linear_search_type;
457 // If we have a valid key-compare-to type, use binary_search_compare_to,
458 // otherwise use binary_search_plain_compare.
459 typedef typename if_<
460 Params::is_key_compare_to::value,
461 binary_search_compare_to_type,
462 binary_search_plain_compare_type>::type binary_search_type;
463 // If the key is an integral or floating point type, use linear search which
464 // is faster than binary search for such types. Might be wise to also
465 // configure linear search based on node-size.
466 typedef typename if_<
467 std::is_integral<key_type>::value ||
468 std::is_floating_point<key_type>::value,
469 linear_search_type, binary_search_type>::type search_type;
470
471 struct base_fields {
472 typedef typename Params::node_count_type field_type;
473
474 // A boolean indicating whether the node is a leaf or not.
475 bool leaf;
476 // The position of the node in the node's parent.
477 field_type position;
478 // The maximum number of values the node can hold.
479 field_type max_count;
480 // The count of the number of values in the node.
481 field_type count;
482 // A pointer to the node's parent.
483 btree_node *parent;
484 };
485
486 enum {
487 kValueSize = params_type::kValueSize,
488 kTargetNodeSize = params_type::kTargetNodeSize,
489
490 // Compute how many values we can fit onto a leaf node.
491 kNodeTargetValues = (kTargetNodeSize - sizeof(base_fields)) / kValueSize,
492 // We need a minimum of 3 values per internal node in order to perform
493 // splitting (1 value for the two nodes involved in the split and 1 value
494 // propagated to the parent as the delimiter for the split).
495 kNodeValues = kNodeTargetValues >= 3 ? kNodeTargetValues : 3,
496
497 kExactMatch = 1 << 30,
498 kMatchMask = kExactMatch - 1,
499 };
500
501 struct leaf_fields : public base_fields {
502 // The array of values. Only the first count of these values have been
503 // constructed and are valid.
504 mutable_value_type values[kNodeValues];
505 };
506
507 struct internal_fields : public leaf_fields {
508 // The array of child pointers. The keys in children_[i] are all less than
509 // key(i). The keys in children_[i + 1] are all greater than key(i). There
510 // are always count + 1 children.
511 btree_node *children[kNodeValues + 1];
512 };
513
514 struct root_fields : public internal_fields {
515 btree_node *rightmost;
516 size_type size;
517 };
518
519 public:
520 // Getter/setter for whether this is a leaf node or not. This value doesn't
521 // change after the node is created.
522 bool leaf() const { return fields_.leaf; }
523
524 // Getter for the position of this node in its parent.
525 int position() const { return fields_.position; }
526 void set_position(int v) { fields_.position = v; }
527
528 // Getter/setter for the number of values stored in this node.
529 int count() const { return fields_.count; }
530 void set_count(int v) { fields_.count = v; }
531 int max_count() const { return fields_.max_count; }
532
533 // Getter for the parent of this node.
534 btree_node* parent() const { return fields_.parent; }
535 // Getter for whether the node is the root of the tree. The parent of the
536 // root of the tree is the leftmost node in the tree which is guaranteed to
537 // be a leaf.
538 bool is_root() const { return parent()->leaf(); }
539 void make_root() {
540 assert(parent()->is_root());
541 fields_.parent = fields_.parent->parent();
542 }
543
544 // Getter for the rightmost root node field. Only valid on the root node.
545 btree_node* rightmost() const { return fields_.rightmost; }
546 btree_node** mutable_rightmost() { return &fields_.rightmost; }
547
548 // Getter for the size root node field. Only valid on the root node.
549 size_type size() const { return fields_.size; }
550 size_type* mutable_size() { return &fields_.size; }
551
552 // Getters for the key/value at position i in the node.
553 const key_type& key(int i) const {
554 return params_type::key(fields_.values[i]);
555 }
556 reference value(int i) {
557 return reinterpret_cast<reference>(fields_.values[i]);
558 }
559 const_reference value(int i) const {
560 return reinterpret_cast<const_reference>(fields_.values[i]);
561 }
562 mutable_value_type* mutable_value(int i) {
563 return &fields_.values[i];
564 }
565
566 // Swap value i in this node with value j in node x.
567 void value_swap(int i, btree_node *x, int j) {
568 params_type::swap(mutable_value(i), x->mutable_value(j));
569 }
570
571 // Getters/setter for the child at position i in the node.
572 btree_node* child(int i) const { return fields_.children[i]; }
573 btree_node** mutable_child(int i) { return &fields_.children[i]; }
574 void set_child(int i, btree_node *c) {
575 *mutable_child(i) = c;
576 c->fields_.parent = this;
577 c->fields_.position = i;
578 }
579
580 // Returns the position of the first value whose key is not less than k.
581 template <typename Compare>
582 int lower_bound(const key_type &k, const Compare &comp) const {
583 return search_type::lower_bound(k, *this, comp);
584 }
585 // Returns the position of the first value whose key is greater than k.
586 template <typename Compare>
587 int upper_bound(const key_type &k, const Compare &comp) const {
588 return search_type::upper_bound(k, *this, comp);
589 }
590
591 // Returns the position of the first value whose key is not less than k using
592 // linear search performed using plain compare.
593 template <typename Compare>
594 int linear_search_plain_compare(
595 const key_type &k, int s, int e, const Compare &comp) const {
596 while (s < e) {
597 if (!btree_compare_keys(comp, key(s), k)) {
598 break;
599 }
600 ++s;
601 }
602 return s;
603 }
604
605 // Returns the position of the first value whose key is not less than k using
606 // linear search performed using compare-to.
607 template <typename Compare>
608 int linear_search_compare_to(
609 const key_type &k, int s, int e, const Compare &comp) const {
610 while (s < e) {
611 int c = comp(key(s), k);
612 if (c == 0) {
613 return s | kExactMatch;
614 } else if (c > 0) {
615 break;
616 }
617 ++s;
618 }
619 return s;
620 }
621
622 // Returns the position of the first value whose key is not less than k using
623 // binary search performed using plain compare.
624 template <typename Compare>
625 int binary_search_plain_compare(
626 const key_type &k, int s, int e, const Compare &comp) const {
627 while (s != e) {
628 int mid = (s + e) / 2;
629 if (btree_compare_keys(comp, key(mid), k)) {
630 s = mid + 1;
631 } else {
632 e = mid;
633 }
634 }
635 return s;
636 }
637
638 // Returns the position of the first value whose key is not less than k using
639 // binary search performed using compare-to.
640 template <typename CompareTo>
641 int binary_search_compare_to(
642 const key_type &k, int s, int e, const CompareTo &comp) const {
643 while (s != e) {
644 int mid = (s + e) / 2;
645 int c = comp(key(mid), k);
646 if (c < 0) {
647 s = mid + 1;
648 } else if (c > 0) {
649 e = mid;
650 } else {
651 // Need to return the first value whose key is not less than k, which
652 // requires continuing the binary search. Note that we are guaranteed
653 // that the result is an exact match because if "key(mid-1) < k" the
654 // call to binary_search_compare_to() will return "mid".
655 s = binary_search_compare_to(k, s, mid, comp);
656 return s | kExactMatch;
657 }
658 }
659 return s;
660 }
661
662 // Inserts the value x at position i, shifting all existing values and
663 // children at positions >= i to the right by 1.
664 void insert_value(int i, const value_type &x);
665
666 // Removes the value at position i, shifting all existing values and children
667 // at positions > i to the left by 1.
668 void remove_value(int i);
669
670 // Rebalances a node with its right sibling.
671 void rebalance_right_to_left(btree_node *sibling, int to_move);
672 void rebalance_left_to_right(btree_node *sibling, int to_move);
673
674 // Splits a node, moving a portion of the node's values to its right sibling.
675 void split(btree_node *sibling, int insert_position);
676
677 // Merges a node with its right sibling, moving all of the values and the
678 // delimiting key in the parent node onto itself.
679 void merge(btree_node *sibling);
680
681 // Swap the contents of "this" and "src".
682 void swap(btree_node *src);
683
684 // Node allocation/deletion routines.
685 static btree_node* init_leaf(
686 leaf_fields *f, btree_node *parent, int max_count) {
687 btree_node *n = reinterpret_cast<btree_node*>(f);
688 f->leaf = 1;
689 f->position = 0;
690 f->max_count = max_count;
691 f->count = 0;
692 f->parent = parent;
693 if (!NDEBUG) {
694 memset(&f->values, 0, max_count * sizeof(value_type));
695 }
696 return n;
697 }
698 static btree_node* init_internal(internal_fields *f, btree_node *parent) {
699 btree_node *n = init_leaf(f, parent, kNodeValues);
700 f->leaf = 0;
701 if (!NDEBUG) {
702 memset(f->children, 0, sizeof(f->children));
703 }
704 return n;
705 }
706 static btree_node* init_root(root_fields *f, btree_node *parent) {
707 btree_node *n = init_internal(f, parent);
708 f->rightmost = parent;
709 f->size = parent->count();
710 return n;
711 }
712 void destroy() {
713 for (int i = 0; i < count(); ++i) {
714 value_destroy(i);
715 }
716 }
717
718 private:
719 void value_init(int i) {
720 new (&fields_.values[i]) mutable_value_type;
721 }
722 void value_init(int i, const value_type &x) {
723 new (&fields_.values[i]) mutable_value_type(x);
724 }
725 void value_destroy(int i) {
726 fields_.values[i].~mutable_value_type();
727 }
728
729 private:
730 root_fields fields_;
731
732 private:
733 btree_node(const btree_node&);
734 void operator=(const btree_node&);
735};
736
737template <typename Node, typename Reference, typename Pointer>
738struct btree_iterator {
739 typedef typename Node::key_type key_type;
740 typedef typename Node::size_type size_type;
741 typedef typename Node::difference_type difference_type;
742 typedef typename Node::params_type params_type;
743
744 typedef Node node_type;
745 typedef typename std::remove_const<Node>::type normal_node;
746 typedef const Node const_node;
747 typedef typename params_type::value_type value_type;
748 typedef typename params_type::pointer normal_pointer;
749 typedef typename params_type::reference normal_reference;
750 typedef typename params_type::const_pointer const_pointer;
751 typedef typename params_type::const_reference const_reference;
752
753 typedef Pointer pointer;
754 typedef Reference reference;
755 typedef std::bidirectional_iterator_tag iterator_category;
756
757 typedef btree_iterator<
758 normal_node, normal_reference, normal_pointer> iterator;
759 typedef btree_iterator<
760 const_node, const_reference, const_pointer> const_iterator;
761 typedef btree_iterator<Node, Reference, Pointer> self_type;
762
763 btree_iterator()
764 : node(NULL),
765 position(-1) {
766 }
767 btree_iterator(Node *n, int p)
768 : node(n),
769 position(p) {
770 }
771 btree_iterator(const iterator &x)
772 : node(x.node),
773 position(x.position) {
774 }
775
776 // Increment/decrement the iterator.
777 void increment() {
778 if (node->leaf() && ++position < node->count()) {
779 return;
780 }
781 increment_slow();
782 }
783 void increment_by(int count);
784 void increment_slow();
785
786 void decrement() {
787 if (node->leaf() && --position >= 0) {
788 return;
789 }
790 decrement_slow();
791 }
792 void decrement_slow();
793
794 bool operator==(const const_iterator &x) const {
795 return node == x.node && position == x.position;
796 }
797 bool operator!=(const const_iterator &x) const {
798 return node != x.node || position != x.position;
799 }
800
801 // Accessors for the key/value the iterator is pointing at.
802 const key_type& key() const {
803 return node->key(position);
804 }
805 reference operator*() const {
806 return node->value(position);
807 }
808 pointer operator->() const {
809 return &node->value(position);
810 }
811
812 self_type& operator++() {
813 increment();
814 return *this;
815 }
816 self_type& operator--() {
817 decrement();
818 return *this;
819 }
820 self_type operator++(int) {
821 self_type tmp = *this;
822 ++*this;
823 return tmp;
824 }
825 self_type operator--(int) {
826 self_type tmp = *this;
827 --*this;
828 return tmp;
829 }
830
831 // The node in the tree the iterator is pointing at.
832 Node *node;
833 // The position within the node of the tree the iterator is pointing at.
834 int position;
835};
836
837// Dispatch helper class for using btree::internal_locate with plain compare.
838struct btree_internal_locate_plain_compare {
839 template <typename K, typename T, typename Iter>
840 static std::pair<Iter, int> dispatch(const K &k, const T &t, Iter iter) {
841 return t.internal_locate_plain_compare(k, iter);
842 }
843};
844
845// Dispatch helper class for using btree::internal_locate with compare-to.
846struct btree_internal_locate_compare_to {
847 template <typename K, typename T, typename Iter>
848 static std::pair<Iter, int> dispatch(const K &k, const T &t, Iter iter) {
849 return t.internal_locate_compare_to(k, iter);
850 }
851};
852
853template <typename Params>
854class btree : public Params::key_compare {
855 typedef btree<Params> self_type;
856 typedef btree_node<Params> node_type;
857 typedef typename node_type::base_fields base_fields;
858 typedef typename node_type::leaf_fields leaf_fields;
859 typedef typename node_type::internal_fields internal_fields;
860 typedef typename node_type::root_fields root_fields;
861 typedef typename Params::is_key_compare_to is_key_compare_to;
862
863 friend struct btree_internal_locate_plain_compare;
864 friend struct btree_internal_locate_compare_to;
865 typedef typename if_<
866 is_key_compare_to::value,
867 btree_internal_locate_compare_to,
868 btree_internal_locate_plain_compare>::type internal_locate_type;
869
870 enum {
871 kNodeValues = node_type::kNodeValues,
872 kMinNodeValues = kNodeValues / 2,
873 kValueSize = node_type::kValueSize,
874 kExactMatch = node_type::kExactMatch,
875 kMatchMask = node_type::kMatchMask,
876 };
877
878 // A helper class to get the empty base class optimization for 0-size
879 // allocators. Base is internal_allocator_type.
880 // (e.g. empty_base_handle<internal_allocator_type, node_type*>). If Base is
881 // 0-size, the compiler doesn't have to reserve any space for it and
882 // sizeof(empty_base_handle) will simply be sizeof(Data). Google [empty base
883 // class optimization] for more details.
884 template <typename Base, typename Data>
885 struct empty_base_handle : public Base {
886 empty_base_handle(const Base &b, const Data &d)
887 : Base(b),
888 data(d) {
889 }
890 Data data;
891 };
892
893 struct node_stats {
894 node_stats(ssize_t l, ssize_t i)
895 : leaf_nodes(l),
896 internal_nodes(i) {
897 }
898
899 node_stats& operator+=(const node_stats &x) {
900 leaf_nodes += x.leaf_nodes;
901 internal_nodes += x.internal_nodes;
902 return *this;
903 }
904
905 ssize_t leaf_nodes;
906 ssize_t internal_nodes;
907 };
908
909 public:
910 typedef Params params_type;
911 typedef typename Params::key_type key_type;
912 typedef typename Params::data_type data_type;
913 typedef typename Params::mapped_type mapped_type;
914 typedef typename Params::value_type value_type;
915 typedef typename Params::key_compare key_compare;
916 typedef typename Params::pointer pointer;
917 typedef typename Params::const_pointer const_pointer;
918 typedef typename Params::reference reference;
919 typedef typename Params::const_reference const_reference;
920 typedef typename Params::size_type size_type;
921 typedef typename Params::difference_type difference_type;
922 typedef btree_iterator<node_type, reference, pointer> iterator;
923 typedef typename iterator::const_iterator const_iterator;
924 typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
925 typedef std::reverse_iterator<iterator> reverse_iterator;
926
927 typedef typename Params::allocator_type allocator_type;
928 typedef typename allocator_type::template rebind<char>::other
929 internal_allocator_type;
930
931 public:
932 // Default constructor.
933 btree(const key_compare &comp, const allocator_type &alloc);
934
935 // Copy constructor.
936 btree(const self_type &x);
937
938 // Destructor.
939 ~btree() {
940 clear();
941 }
942
943 // Iterator routines.
944 iterator begin() {
945 return iterator(leftmost(), 0);
946 }
947 const_iterator begin() const {
948 return const_iterator(leftmost(), 0);
949 }
950 iterator end() {
951 return iterator(rightmost(), rightmost() ? rightmost()->count() : 0);
952 }
953 const_iterator end() const {
954 return const_iterator(rightmost(), rightmost() ? rightmost()->count() : 0);
955 }
956 reverse_iterator rbegin() {
957 return reverse_iterator(end());
958 }
959 const_reverse_iterator rbegin() const {
960 return const_reverse_iterator(end());
961 }
962 reverse_iterator rend() {
963 return reverse_iterator(begin());
964 }
965 const_reverse_iterator rend() const {
966 return const_reverse_iterator(begin());
967 }
968
969 // Finds the first element whose key is not less than key.
970 iterator lower_bound(const key_type &key) {
971 return internal_end(
972 internal_lower_bound(key, iterator(root(), 0)));
973 }
974 const_iterator lower_bound(const key_type &key) const {
975 return internal_end(
976 internal_lower_bound(key, const_iterator(root(), 0)));
977 }
978
979 // Finds the first element whose key is greater than key.
980 iterator upper_bound(const key_type &key) {
981 return internal_end(
982 internal_upper_bound(key, iterator(root(), 0)));
983 }
984 const_iterator upper_bound(const key_type &key) const {
985 return internal_end(
986 internal_upper_bound(key, const_iterator(root(), 0)));
987 }
988
989 // Finds the range of values which compare equal to key. The first member of
990 // the returned pair is equal to lower_bound(key). The second member pair of
991 // the pair is equal to upper_bound(key).
992 std::pair<iterator,iterator> equal_range(const key_type &key) {
993 return std::make_pair(lower_bound(key), upper_bound(key));
994 }
995 std::pair<const_iterator,const_iterator> equal_range(const key_type &key) const {
996 return std::make_pair(lower_bound(key), upper_bound(key));
997 }
998
999 // Inserts a value into the btree only if it does not already exist. The
1000 // boolean return value indicates whether insertion succeeded or failed. The
1001 // ValuePointer type is used to avoid instatiating the value unless the key
1002 // is being inserted. Value is not dereferenced if the key already exists in
1003 // the btree. See btree_map::operator[].
1004 template <typename ValuePointer>
1005 std::pair<iterator,bool> insert_unique(const key_type &key, ValuePointer value);
1006
1007 // Inserts a value into the btree only if it does not already exist. The
1008 // boolean return value indicates whether insertion succeeded or failed.
1009 std::pair<iterator,bool> insert_unique(const value_type &v) {
1010 return insert_unique(params_type::key(v), &v);
1011 }
1012
1013 // Insert with hint. Check to see if the value should be placed immediately
1014 // before position in the tree. If it does, then the insertion will take
1015 // amortized constant time. If not, the insertion will take amortized
1016 // logarithmic time as if a call to insert_unique(v) were made.
1017 iterator insert_unique(iterator position, const value_type &v);
1018
1019 // Insert a range of values into the btree.
1020 template <typename InputIterator>
1021 void insert_unique(InputIterator b, InputIterator e);
1022
1023 // Inserts a value into the btree. The ValuePointer type is used to avoid
1024 // instatiating the value unless the key is being inserted. Value is not
1025 // dereferenced if the key already exists in the btree. See
1026 // btree_map::operator[].
1027 template <typename ValuePointer>
1028 iterator insert_multi(const key_type &key, ValuePointer value);
1029
1030 // Inserts a value into the btree.
1031 iterator insert_multi(const value_type &v) {
1032 return insert_multi(params_type::key(v), &v);
1033 }
1034
1035 // Insert with hint. Check to see if the value should be placed immediately
1036 // before position in the tree. If it does, then the insertion will take
1037 // amortized constant time. If not, the insertion will take amortized
1038 // logarithmic time as if a call to insert_multi(v) were made.
1039 iterator insert_multi(iterator position, const value_type &v);
1040
1041 // Insert a range of values into the btree.
1042 template <typename InputIterator>
1043 void insert_multi(InputIterator b, InputIterator e);
1044
1045 void assign(const self_type &x);
1046
1047 // Erase the specified iterator from the btree. The iterator must be valid
1048 // (i.e. not equal to end()). Return an iterator pointing to the node after
1049 // the one that was erased (or end() if none exists).
1050 iterator erase(iterator iter);
1051
1052 // Erases range. Returns the number of keys erased.
1053 int erase(iterator begin, iterator end);
1054
1055 // Erases the specified key from the btree. Returns 1 if an element was
1056 // erased and 0 otherwise.
1057 int erase_unique(const key_type &key);
1058
1059 // Erases all of the entries matching the specified key from the
1060 // btree. Returns the number of elements erased.
1061 int erase_multi(const key_type &key);
1062
1063 // Finds the iterator corresponding to a key or returns end() if the key is
1064 // not present.
1065 iterator find_unique(const key_type &key) {
1066 return internal_end(
1067 internal_find_unique(key, iterator(root(), 0)));
1068 }
1069 const_iterator find_unique(const key_type &key) const {
1070 return internal_end(
1071 internal_find_unique(key, const_iterator(root(), 0)));
1072 }
1073 iterator find_multi(const key_type &key) {
1074 return internal_end(
1075 internal_find_multi(key, iterator(root(), 0)));
1076 }
1077 const_iterator find_multi(const key_type &key) const {
1078 return internal_end(
1079 internal_find_multi(key, const_iterator(root(), 0)));
1080 }
1081
1082 // Returns a count of the number of times the key appears in the btree.
1083 size_type count_unique(const key_type &key) const {
1084 const_iterator b = internal_find_unique(
1085 key, const_iterator(root(), 0));
1086 if (!b.node) {
1087 // The key doesn't exist in the tree.
1088 return 0;
1089 }
1090 return 1;
1091 }
1092 // Returns a count of the number of times the key appears in the btree.
1093 size_type count_multi(const key_type &key) const {
1094 return distance(lower_bound(key), upper_bound(key));
1095 }
1096
1097 // Clear the btree, deleting all of the values it contains.
1098 void clear();
1099
1100 // Swap the contents of *this and x.
1101 void swap(self_type &x);
1102
1103 // Assign the contents of x to *this.
1104 self_type& operator=(const self_type &x) {
1105 if (&x == this) {
1106 // Don't copy onto ourselves.
1107 return *this;
1108 }
1109 assign(x);
1110 return *this;
1111 }
1112
1113 key_compare* mutable_key_comp() {
1114 return this;
1115 }
1116 const key_compare& key_comp() const {
1117 return *this;
1118 }
1119 bool compare_keys(const key_type &x, const key_type &y) const {
1120 return btree_compare_keys(key_comp(), x, y);
1121 }
1122
1123 // Dump the btree to the specified ostream. Requires that operator<< is
1124 // defined for Key and Value.
1125 void dump(std::ostream &os) const {
1126 if (root() != NULL) {
1127 internal_dump(os, root(), 0);
1128 }
1129 }
1130
1131 // Verifies the structure of the btree.
1132 void verify() const;
1133
1134 // Size routines. Note that empty() is slightly faster than doing size()==0.
1135 size_type size() const {
1136 if (empty()) return 0;
1137 if (root()->leaf()) return root()->count();
1138 return root()->size();
1139 }
1140 size_type max_size() const { return std::numeric_limits<size_type>::max(); }
1141 bool empty() const { return root() == NULL; }
1142
1143 // The height of the btree. An empty tree will have height 0.
1144 size_type height() const {
1145 size_type h = 0;
1146 if (root()) {
1147 // Count the length of the chain from the leftmost node up to the
1148 // root. We actually count from the root back around to the level below
1149 // the root, but the calculation is the same because of the circularity
1150 // of that traversal.
1151 const node_type *n = root();
1152 do {
1153 ++h;
1154 n = n->parent();
1155 } while (n != root());
1156 }
1157 return h;
1158 }
1159
1160 // The number of internal, leaf and total nodes used by the btree.
1161 size_type leaf_nodes() const {
1162 return internal_stats(root()).leaf_nodes;
1163 }
1164 size_type internal_nodes() const {
1165 return internal_stats(root()).internal_nodes;
1166 }
1167 size_type nodes() const {
1168 node_stats stats = internal_stats(root());
1169 return stats.leaf_nodes + stats.internal_nodes;
1170 }
1171
1172 // The total number of bytes used by the btree.
1173 size_type bytes_used() const {
1174 node_stats stats = internal_stats(root());
1175 if (stats.leaf_nodes == 1 && stats.internal_nodes == 0) {
1176 return sizeof(*this) +
1177 sizeof(base_fields) + root()->max_count() * sizeof(value_type);
1178 } else {
1179 return sizeof(*this) +
1180 sizeof(root_fields) - sizeof(internal_fields) +
1181 stats.leaf_nodes * sizeof(leaf_fields) +
1182 stats.internal_nodes * sizeof(internal_fields);
1183 }
1184 }
1185
1186 // The average number of bytes used per value stored in the btree.
1187 static double average_bytes_per_value() {
1188 // Returns the number of bytes per value on a leaf node that is 75%
1189 // full. Experimentally, this matches up nicely with the computed number of
1190 // bytes per value in trees that had their values inserted in random order.
1191 return sizeof(leaf_fields) / (kNodeValues * 0.75);
1192 }
1193
1194 // The fullness of the btree. Computed as the number of elements in the btree
1195 // divided by the maximum number of elements a tree with the current number
1196 // of nodes could hold. A value of 1 indicates perfect space
1197 // utilization. Smaller values indicate space wastage.
1198 double fullness() const {
1199 return double(size()) / (nodes() * kNodeValues);
1200 }
1201 // The overhead of the btree structure in bytes per node. Computed as the
1202 // total number of bytes used by the btree minus the number of bytes used for
1203 // storing elements divided by the number of elements.
1204 double overhead() const {
1205 if (empty()) {
1206 return 0.0;
1207 }
1208 return (bytes_used() - size() * kValueSize) / double(size());
1209 }
1210
1211 private:
1212 // Internal accessor routines.
1213 node_type* root() { return root_.data; }
1214 const node_type* root() const { return root_.data; }
1215 node_type** mutable_root() { return &root_.data; }
1216
1217 // The rightmost node is stored in the root node.
1218 node_type* rightmost() {
1219 return (!root() || root()->leaf()) ? root() : root()->rightmost();
1220 }
1221 const node_type* rightmost() const {
1222 return (!root() || root()->leaf()) ? root() : root()->rightmost();
1223 }
1224 node_type** mutable_rightmost() { return root()->mutable_rightmost(); }
1225
1226 // The leftmost node is stored as the parent of the root node.
1227 node_type* leftmost() { return root() ? root()->parent() : NULL; }
1228 const node_type* leftmost() const { return root() ? root()->parent() : NULL; }
1229
1230 // The size of the tree is stored in the root node.
1231 size_type* mutable_size() { return root()->mutable_size(); }
1232
1233 // Allocator routines.
1234 internal_allocator_type* mutable_internal_allocator() {
1235 return static_cast<internal_allocator_type*>(&root_);
1236 }
1237 const internal_allocator_type& internal_allocator() const {
1238 return *static_cast<const internal_allocator_type*>(&root_);
1239 }
1240
1241 // Node creation/deletion routines.
1242 node_type* new_internal_node(node_type *parent) {
1243 internal_fields *p = reinterpret_cast<internal_fields*>(
1244 mutable_internal_allocator()->allocate(sizeof(internal_fields)));
1245 return node_type::init_internal(p, parent);
1246 }
1247 node_type* new_internal_root_node() {
1248 root_fields *p = reinterpret_cast<root_fields*>(
1249 mutable_internal_allocator()->allocate(sizeof(root_fields)));
1250 return node_type::init_root(p, root()->parent());
1251 }
1252 node_type* new_leaf_node(node_type *parent) {
1253 leaf_fields *p = reinterpret_cast<leaf_fields*>(
1254 mutable_internal_allocator()->allocate(sizeof(leaf_fields)));
1255 return node_type::init_leaf(p, parent, kNodeValues);
1256 }
1257 node_type* new_leaf_root_node(int max_count) {
1258 leaf_fields *p = reinterpret_cast<leaf_fields*>(
1259 mutable_internal_allocator()->allocate(
1260 sizeof(base_fields) + max_count * sizeof(value_type)));
1261 return node_type::init_leaf(p, reinterpret_cast<node_type*>(p), max_count);
1262 }
1263 void delete_internal_node(node_type *node) {
1264 node->destroy();
1265 assert(node != root());
1266 mutable_internal_allocator()->deallocate(
1267 reinterpret_cast<char*>(node), sizeof(internal_fields));
1268 }
1269 void delete_internal_root_node() {
1270 root()->destroy();
1271 mutable_internal_allocator()->deallocate(
1272 reinterpret_cast<char*>(root()), sizeof(root_fields));
1273 }
1274 void delete_leaf_node(node_type *node) {
1275 node->destroy();
1276 mutable_internal_allocator()->deallocate(
1277 reinterpret_cast<char*>(node),
1278 sizeof(base_fields) + node->max_count() * sizeof(value_type));
1279 }
1280
1281 // Rebalances or splits the node iter points to.
1282 void rebalance_or_split(iterator *iter);
1283
1284 // Merges the values of left, right and the delimiting key on their parent
1285 // onto left, removing the delimiting key and deleting right.
1286 void merge_nodes(node_type *left, node_type *right);
1287
1288 // Tries to merge node with its left or right sibling, and failing that,
1289 // rebalance with its left or right sibling. Returns true if a merge
1290 // occurred, at which point it is no longer valid to access node. Returns
1291 // false if no merging took place.
1292 bool try_merge_or_rebalance(iterator *iter);
1293
1294 // Tries to shrink the height of the tree by 1.
1295 void try_shrink();
1296
1297 iterator internal_end(iterator iter) {
1298 return iter.node ? iter : end();
1299 }
1300 const_iterator internal_end(const_iterator iter) const {
1301 return iter.node ? iter : end();
1302 }
1303
1304 // Inserts a value into the btree immediately before iter. Requires that
1305 // key(v) <= iter.key() and (--iter).key() <= key(v).
1306 iterator internal_insert(iterator iter, const value_type &v);
1307
1308 // Returns an iterator pointing to the first value >= the value "iter" is
1309 // pointing at. Note that "iter" might be pointing to an invalid location as
1310 // iter.position == iter.node->count(). This routine simply moves iter up in
1311 // the tree to a valid location.
1312 template <typename IterType>
1313 static IterType internal_last(IterType iter);
1314
1315 // Returns an iterator pointing to the leaf position at which key would
1316 // reside in the tree. We provide 2 versions of internal_locate. The first
1317 // version (internal_locate_plain_compare) always returns 0 for the second
1318 // field of the pair. The second version (internal_locate_compare_to) is for
1319 // the key-compare-to specialization and returns either kExactMatch (if the
1320 // key was found in the tree) or -kExactMatch (if it wasn't) in the second
1321 // field of the pair. The compare_to specialization allows the caller to
1322 // avoid a subsequent comparison to determine if an exact match was made,
1323 // speeding up string keys.
1324 template <typename IterType>
1325 std::pair<IterType, int> internal_locate(
1326 const key_type &key, IterType iter) const;
1327 template <typename IterType>
1328 std::pair<IterType, int> internal_locate_plain_compare(
1329 const key_type &key, IterType iter) const;
1330 template <typename IterType>
1331 std::pair<IterType, int> internal_locate_compare_to(
1332 const key_type &key, IterType iter) const;
1333
1334 // Internal routine which implements lower_bound().
1335 template <typename IterType>
1336 IterType internal_lower_bound(
1337 const key_type &key, IterType iter) const;
1338
1339 // Internal routine which implements upper_bound().
1340 template <typename IterType>
1341 IterType internal_upper_bound(
1342 const key_type &key, IterType iter) const;
1343
1344 // Internal routine which implements find_unique().
1345 template <typename IterType>
1346 IterType internal_find_unique(
1347 const key_type &key, IterType iter) const;
1348
1349 // Internal routine which implements find_multi().
1350 template <typename IterType>
1351 IterType internal_find_multi(
1352 const key_type &key, IterType iter) const;
1353
1354 // Deletes a node and all of its children.
1355 void internal_clear(node_type *node);
1356
1357 // Dumps a node and all of its children to the specified ostream.
1358 void internal_dump(std::ostream &os, const node_type *node, int level) const;
1359
1360 // Verifies the tree structure of node.
1361 int internal_verify(const node_type *node,
1362 const key_type *lo, const key_type *hi) const;
1363
1364 node_stats internal_stats(const node_type *node) const {
1365 if (!node) {
1366 return node_stats(0, 0);
1367 }
1368 if (node->leaf()) {
1369 return node_stats(1, 0);
1370 }
1371 node_stats res(0, 1);
1372 for (int i = 0; i <= node->count(); ++i) {
1373 res += internal_stats(node->child(i));
1374 }
1375 return res;
1376 }
1377
1378 private:
1379 empty_base_handle<internal_allocator_type, node_type*> root_;
1380
1381 private:
1382 // A never instantiated helper function that returns big_ if we have a
1383 // key-compare-to functor or if R is bool and small_ otherwise.
1384 template <typename R>
1385 static typename if_<
1386 if_<is_key_compare_to::value,
1387 std::is_same<R, int>,
1388 std::is_same<R, bool> >::type::value,
1389 big_, small_>::type key_compare_checker(R);
1390
1391 // A never instantiated helper function that returns the key comparison
1392 // functor.
1393 static key_compare key_compare_helper();
1394
1395 // Verify that key_compare returns a bool. This is similar to the way
1396 // is_convertible in base/type_traits.h works. Note that key_compare_checker
1397 // is never actually invoked. The compiler will select which
1398 // key_compare_checker() to instantiate and then figure out the size of the
1399 // return type of key_compare_checker() at compile time which we then check
1400 // against the sizeof of big_.
1401 COMPILE_ASSERT(
1402 sizeof(key_compare_checker(key_compare_helper()(key_type(), key_type()))) ==
1403 sizeof(big_),
1404 key_comparison_function_must_return_bool);
1405
1406 // Note: We insist on kTargetValues, which is computed from
1407 // Params::kTargetNodeSize, must fit the base_fields::field_type.
1408 COMPILE_ASSERT(kNodeValues <
1409 (1 << (8 * sizeof(typename base_fields::field_type))),
1410 target_node_size_too_large);
1411
1412 // Test the assumption made in setting kNodeValueSpace.
1413 COMPILE_ASSERT(sizeof(base_fields) >= 2 * sizeof(void*),
1414 node_space_assumption_incorrect);
1415};
1416
1417////
1418// btree_node methods
1419template <typename P>
1420inline void btree_node<P>::insert_value(int i, const value_type &x) {
1421 assert(i <= count());
1422 value_init(count(), x);
1423 for (int j = count(); j > i; --j) {
1424 value_swap(j, this, j - 1);
1425 }
1426 set_count(count() + 1);
1427
1428 if (!leaf()) {
1429 ++i;
1430 for (int j = count(); j > i; --j) {
1431 *mutable_child(j) = child(j - 1);
1432 child(j)->set_position(j);
1433 }
1434 *mutable_child(i) = NULL;
1435 }
1436}
1437
1438template <typename P>
1439inline void btree_node<P>::remove_value(int i) {
1440 if (!leaf()) {
1441 assert(child(i + 1)->count() == 0);
1442 for (int j = i + 1; j < count(); ++j) {
1443 *mutable_child(j) = child(j + 1);
1444 child(j)->set_position(j);
1445 }
1446 *mutable_child(count()) = NULL;
1447 }
1448
1449 set_count(count() - 1);
1450 for (; i < count(); ++i) {
1451 value_swap(i, this, i + 1);
1452 }
1453 value_destroy(i);
1454}
1455
1456template <typename P>
1457void btree_node<P>::rebalance_right_to_left(btree_node *src, int to_move) {
1458 assert(parent() == src->parent());
1459 assert(position() + 1 == src->position());
1460 assert(src->count() >= count());
1461 assert(to_move >= 1);
1462 assert(to_move <= src->count());
1463
1464 // Make room in the left node for the new values.
1465 for (int i = 0; i < to_move; ++i) {
1466 value_init(i + count());
1467 }
1468
1469 // Move the delimiting value to the left node and the new delimiting value
1470 // from the right node.
1471 value_swap(count(), parent(), position());
1472 parent()->value_swap(position(), src, to_move - 1);
1473
1474 // Move the values from the right to the left node.
1475 for (int i = 1; i < to_move; ++i) {
1476 value_swap(count() + i, src, i - 1);
1477 }
1478 // Shift the values in the right node to their correct position.
1479 for (int i = to_move; i < src->count(); ++i) {
1480 src->value_swap(i - to_move, src, i);
1481 }
1482 for (int i = 1; i <= to_move; ++i) {
1483 src->value_destroy(src->count() - i);
1484 }
1485
1486 if (!leaf()) {
1487 // Move the child pointers from the right to the left node.
1488 for (int i = 0; i < to_move; ++i) {
1489 set_child(1 + count() + i, src->child(i));
1490 }
1491 for (int i = 0; i <= src->count() - to_move; ++i) {
1492 assert(i + to_move <= src->max_count());
1493 src->set_child(i, src->child(i + to_move));
1494 *src->mutable_child(i + to_move) = NULL;
1495 }
1496 }
1497
1498 // Fixup the counts on the src and dest nodes.
1499 set_count(count() + to_move);
1500 src->set_count(src->count() - to_move);
1501}
1502
1503template <typename P>
1504void btree_node<P>::rebalance_left_to_right(btree_node *dest, int to_move) {
1505 assert(parent() == dest->parent());
1506 assert(position() + 1 == dest->position());
1507 assert(count() >= dest->count());
1508 assert(to_move >= 1);
1509 assert(to_move <= count());
1510
1511 // Make room in the right node for the new values.
1512 for (int i = 0; i < to_move; ++i) {
1513 dest->value_init(i + dest->count());
1514 }
1515 for (int i = dest->count() - 1; i >= 0; --i) {
1516 dest->value_swap(i, dest, i + to_move);
1517 }
1518
1519 // Move the delimiting value to the right node and the new delimiting value
1520 // from the left node.
1521 dest->value_swap(to_move - 1, parent(), position());
1522 parent()->value_swap(position(), this, count() - to_move);
1523 value_destroy(count() - to_move);
1524
1525 // Move the values from the left to the right node.
1526 for (int i = 1; i < to_move; ++i) {
1527 value_swap(count() - to_move + i, dest, i - 1);
1528 value_destroy(count() - to_move + i);
1529 }
1530
1531 if (!leaf()) {
1532 // Move the child pointers from the left to the right node.
1533 for (int i = dest->count(); i >= 0; --i) {
1534 dest->set_child(i + to_move, dest->child(i));
1535 *dest->mutable_child(i) = NULL;
1536 }
1537 for (int i = 1; i <= to_move; ++i) {
1538 dest->set_child(i - 1, child(count() - to_move + i));
1539 *mutable_child(count() - to_move + i) = NULL;
1540 }
1541 }
1542
1543 // Fixup the counts on the src and dest nodes.
1544 set_count(count() - to_move);
1545 dest->set_count(dest->count() + to_move);
1546}
1547
1548template <typename P>
1549void btree_node<P>::split(btree_node *dest, int insert_position) {
1550 assert(dest->count() == 0);
1551
1552 // We bias the split based on the position being inserted. If we're
1553 // inserting at the beginning of the left node then bias the split to put
1554 // more values on the right node. If we're inserting at the end of the
1555 // right node then bias the split to put more values on the left node.
1556 if (insert_position == 0) {
1557 dest->set_count(count() - 1);
1558 } else if (insert_position == max_count()) {
1559 dest->set_count(0);
1560 } else {
1561 dest->set_count(count() / 2);
1562 }
1563 set_count(count() - dest->count());
1564 assert(count() >= 1);
1565
1566 // Move values from the left sibling to the right sibling.
1567 for (int i = 0; i < dest->count(); ++i) {
1568 dest->value_init(i);
1569 value_swap(count() + i, dest, i);
1570 value_destroy(count() + i);
1571 }
1572
1573 // The split key is the largest value in the left sibling.
1574 set_count(count() - 1);
1575 parent()->insert_value(position(), value_type());
1576 value_swap(count(), parent(), position());
1577 value_destroy(count());
1578 parent()->set_child(position() + 1, dest);
1579
1580 if (!leaf()) {
1581 for (int i = 0; i <= dest->count(); ++i) {
1582 assert(child(count() + i + 1) != NULL);
1583 dest->set_child(i, child(count() + i + 1));
1584 *mutable_child(count() + i + 1) = NULL;
1585 }
1586 }
1587}
1588
1589template <typename P>
1590void btree_node<P>::merge(btree_node *src) {
1591 assert(parent() == src->parent());
1592 assert(position() + 1 == src->position());
1593
1594 // Move the delimiting value to the left node.
1595 value_init(count());
1596 value_swap(count(), parent(), position());
1597
1598 // Move the values from the right to the left node.
1599 for (int i = 0; i < src->count(); ++i) {
1600 value_init(1 + count() + i);
1601 value_swap(1 + count() + i, src, i);
1602 src->value_destroy(i);
1603 }
1604
1605 if (!leaf()) {
1606 // Move the child pointers from the right to the left node.
1607 for (int i = 0; i <= src->count(); ++i) {
1608 set_child(1 + count() + i, src->child(i));
1609 *src->mutable_child(i) = NULL;
1610 }
1611 }
1612
1613 // Fixup the counts on the src and dest nodes.
1614 set_count(1 + count() + src->count());
1615 src->set_count(0);
1616
1617 // Remove the value on the parent node.
1618 parent()->remove_value(position());
1619}
1620
1621template <typename P>
1622void btree_node<P>::swap(btree_node *x) {
1623 assert(leaf() == x->leaf());
1624
1625 // Swap the values.
1626 for (int i = count(); i < x->count(); ++i) {
1627 value_init(i);
1628 }
1629 for (int i = x->count(); i < count(); ++i) {
1630 x->value_init(i);
1631 }
1632 int n = std::max(count(), x->count());
1633 for (int i = 0; i < n; ++i) {
1634 value_swap(i, x, i);
1635 }
1636 for (int i = count(); i < x->count(); ++i) {
1637 x->value_destroy(i);
1638 }
1639 for (int i = x->count(); i < count(); ++i) {
1640 value_destroy(i);
1641 }
1642
1643 if (!leaf()) {
1644 // Swap the child pointers.
1645 for (int i = 0; i <= n; ++i) {
1646 btree_swap_helper(*mutable_child(i), *x->mutable_child(i));
1647 }
1648 for (int i = 0; i <= count(); ++i) {
1649 x->child(i)->fields_.parent = x;
1650 }
1651 for (int i = 0; i <= x->count(); ++i) {
1652 child(i)->fields_.parent = this;
1653 }
1654 }
1655
1656 // Swap the counts.
1657 btree_swap_helper(fields_.count, x->fields_.count);
1658}
1659
1660////
1661// btree_iterator methods
1662template <typename N, typename R, typename P>
1663void btree_iterator<N, R, P>::increment_slow() {
1664 if (node->leaf()) {
1665 assert(position >= node->count());
1666 self_type save(*this);
1667 while (position == node->count() && !node->is_root()) {
1668 assert(node->parent()->child(node->position()) == node);
1669 position = node->position();
1670 node = node->parent();
1671 }
1672 if (position == node->count()) {
1673 *this = save;
1674 }
1675 } else {
1676 assert(position < node->count());
1677 node = node->child(position + 1);
1678 while (!node->leaf()) {
1679 node = node->child(0);
1680 }
1681 position = 0;
1682 }
1683}
1684
1685template <typename N, typename R, typename P>
1686void btree_iterator<N, R, P>::increment_by(int count) {
1687 while (count > 0) {
1688 if (node->leaf()) {
1689 int rest = node->count() - position;
1690 position += std::min(rest, count);
1691 count = count - rest;
1692 if (position < node->count()) {
1693 return;
1694 }
1695 } else {
1696 --count;
1697 }
1698 increment_slow();
1699 }
1700}
1701
1702template <typename N, typename R, typename P>
1703void btree_iterator<N, R, P>::decrement_slow() {
1704 if (node->leaf()) {
1705 assert(position <= -1);
1706 self_type save(*this);
1707 while (position < 0 && !node->is_root()) {
1708 assert(node->parent()->child(node->position()) == node);
1709 position = node->position() - 1;
1710 node = node->parent();
1711 }
1712 if (position < 0) {
1713 *this = save;
1714 }
1715 } else {
1716 assert(position >= 0);
1717 node = node->child(position);
1718 while (!node->leaf()) {
1719 node = node->child(node->count());
1720 }
1721 position = node->count() - 1;
1722 }
1723}
1724
1725////
1726// btree methods
1727template <typename P>
1728btree<P>::btree(const key_compare &comp, const allocator_type &alloc)
1729 : key_compare(comp),
1730 root_(alloc, NULL) {
1731}
1732
1733template <typename P>
1734btree<P>::btree(const self_type &x)
1735 : key_compare(x.key_comp()),
1736 root_(x.internal_allocator(), NULL) {
1737 assign(x);
1738}
1739
1740template <typename P> template <typename ValuePointer>
1741std::pair<typename btree<P>::iterator, bool>
1742btree<P>::insert_unique(const key_type &key, ValuePointer value) {
1743 if (empty()) {
1744 *mutable_root() = new_leaf_root_node(1);
1745 }
1746
1747 std::pair<iterator, int> res = internal_locate(key, iterator(root(), 0));
1748 iterator &iter = res.first;
1749 if (res.second == kExactMatch) {
1750 // The key already exists in the tree, do nothing.
1751 return std::make_pair(internal_last(iter), false);
1752 } else if (!res.second) {
1753 iterator last = internal_last(iter);
1754 if (last.node && !compare_keys(key, last.key())) {
1755 // The key already exists in the tree, do nothing.
1756 return std::make_pair(last, false);
1757 }
1758 }
1759
1760 return std::make_pair(internal_insert(iter, *value), true);
1761}
1762
1763template <typename P>
1764inline typename btree<P>::iterator
1765btree<P>::insert_unique(iterator position, const value_type &v) {
1766 if (!empty()) {
1767 const key_type &key = params_type::key(v);
1768 if (position == end() || compare_keys(key, position.key())) {
1769 iterator prev = position;
1770 if (position == begin() || compare_keys((--prev).key(), key)) {
1771 // prev.key() < key < position.key()
1772 return internal_insert(position, v);
1773 }
1774 } else if (compare_keys(position.key(), key)) {
1775 iterator next = position;
1776 ++next;
1777 if (next == end() || compare_keys(key, next.key())) {
1778 // position.key() < key < next.key()
1779 return internal_insert(next, v);
1780 }
1781 } else {
1782 // position.key() == key
1783 return position;
1784 }
1785 }
1786 return insert_unique(v).first;
1787}
1788
1789template <typename P> template <typename InputIterator>
1790void btree<P>::insert_unique(InputIterator b, InputIterator e) {
1791 for (; b != e; ++b) {
1792 insert_unique(end(), *b);
1793 }
1794}
1795
1796template <typename P> template <typename ValuePointer>
1797typename btree<P>::iterator
1798btree<P>::insert_multi(const key_type &key, ValuePointer value) {
1799 if (empty()) {
1800 *mutable_root() = new_leaf_root_node(1);
1801 }
1802
1803 iterator iter = internal_upper_bound(key, iterator(root(), 0));
1804 if (!iter.node) {
1805 iter = end();
1806 }
1807 return internal_insert(iter, *value);
1808}
1809
1810template <typename P>
1811typename btree<P>::iterator
1812btree<P>::insert_multi(iterator position, const value_type &v) {
1813 if (!empty()) {
1814 const key_type &key = params_type::key(v);
1815 if (position == end() || !compare_keys(position.key(), key)) {
1816 iterator prev = position;
1817 if (position == begin() || !compare_keys(key, (--prev).key())) {
1818 // prev.key() <= key <= position.key()
1819 return internal_insert(position, v);
1820 }
1821 } else {
1822 iterator next = position;
1823 ++next;
1824 if (next == end() || !compare_keys(next.key(), key)) {
1825 // position.key() < key <= next.key()
1826 return internal_insert(next, v);
1827 }
1828 }
1829 }
1830 return insert_multi(v);
1831}
1832
1833template <typename P> template <typename InputIterator>
1834void btree<P>::insert_multi(InputIterator b, InputIterator e) {
1835 for (; b != e; ++b) {
1836 insert_multi(end(), *b);
1837 }
1838}
1839
1840template <typename P>
1841void btree<P>::assign(const self_type &x) {
1842 clear();
1843
1844 *mutable_key_comp() = x.key_comp();
1845 *mutable_internal_allocator() = x.internal_allocator();
1846
1847 // Assignment can avoid key comparisons because we know the order of the
1848 // values is the same order we'll store them in.
1849 for (const_iterator iter = x.begin(); iter != x.end(); ++iter) {
1850 if (empty()) {
1851 insert_multi(*iter);
1852 } else {
1853 // If the btree is not empty, we can just insert the new value at the end
1854 // of the tree!
1855 internal_insert(end(), *iter);
1856 }
1857 }
1858}
1859
1860template <typename P>
1861typename btree<P>::iterator btree<P>::erase(iterator iter) {
1862 bool internal_delete = false;
1863 if (!iter.node->leaf()) {
1864 // Deletion of a value on an internal node. Swap the key with the largest
1865 // value of our left child. This is easy, we just decrement iter.
1866 iterator tmp_iter(iter--);
1867 assert(iter.node->leaf());
1868 assert(!compare_keys(tmp_iter.key(), iter.key()));
1869 iter.node->value_swap(iter.position, tmp_iter.node, tmp_iter.position);
1870 internal_delete = true;
1871 --*mutable_size();
1872 } else if (!root()->leaf()) {
1873 --*mutable_size();
1874 }
1875
1876 // Delete the key from the leaf.
1877 iter.node->remove_value(iter.position);
1878
1879 // We want to return the next value after the one we just erased. If we
1880 // erased from an internal node (internal_delete == true), then the next
1881 // value is ++(++iter). If we erased from a leaf node (internal_delete ==
1882 // false) then the next value is ++iter. Note that ++iter may point to an
1883 // internal node and the value in the internal node may move to a leaf node
1884 // (iter.node) when rebalancing is performed at the leaf level.
1885
1886 // Merge/rebalance as we walk back up the tree.
1887 iterator res(iter);
1888 for (;;) {
1889 if (iter.node == root()) {
1890 try_shrink();
1891 if (empty()) {
1892 return end();
1893 }
1894 break;
1895 }
1896 if (iter.node->count() >= kMinNodeValues) {
1897 break;
1898 }
1899 bool merged = try_merge_or_rebalance(&iter);
1900 if (iter.node->leaf()) {
1901 res = iter;
1902 }
1903 if (!merged) {
1904 break;
1905 }
1906 iter.node = iter.node->parent();
1907 }
1908
1909 // Adjust our return value. If we're pointing at the end of a node, advance
1910 // the iterator.
1911 if (res.position == res.node->count()) {
1912 res.position = res.node->count() - 1;
1913 ++res;
1914 }
1915 // If we erased from an internal node, advance the iterator.
1916 if (internal_delete) {
1917 ++res;
1918 }
1919 return res;
1920}
1921
1922template <typename P>
1923int btree<P>::erase(iterator b, iterator e) {
1924 int count = distance(b, e);
1925 for (int i = 0; i < count; i++) {
1926 b = erase(b);
1927 }
1928 return count;
1929}
1930
1931template <typename P>
1932int btree<P>::erase_unique(const key_type &key) {
1933 iterator iter = internal_find_unique(key, iterator(root(), 0));
1934 if (!iter.node) {
1935 // The key doesn't exist in the tree, return nothing done.
1936 return 0;
1937 }
1938 erase(iter);
1939 return 1;
1940}
1941
1942template <typename P>
1943int btree<P>::erase_multi(const key_type &key) {
1944 iterator b = internal_lower_bound(key, iterator(root(), 0));
1945 if (!b.node) {
1946 // The key doesn't exist in the tree, return nothing done.
1947 return 0;
1948 }
1949 // Delete all of the keys between begin and upper_bound(key).
1950 iterator e = internal_end(
1951 internal_upper_bound(key, iterator(root(), 0)));
1952 return erase(b, e);
1953}
1954
1955template <typename P>
1956void btree<P>::clear() {
1957 if (root() != NULL) {
1958 internal_clear(root());
1959 }
1960 *mutable_root() = NULL;
1961}
1962
1963template <typename P>
1964void btree<P>::swap(self_type &x) {
1965 std::swap(static_cast<key_compare&>(*this), static_cast<key_compare&>(x));
1966 std::swap(root_, x.root_);
1967}
1968
1969template <typename P>
1970void btree<P>::verify() const {
1971 if (root() != NULL) {
1972 assert(size() == internal_verify(root(), NULL, NULL));
1973 assert(leftmost() == (++const_iterator(root(), -1)).node);
1974 assert(rightmost() == (--const_iterator(root(), root()->count())).node);
1975 assert(leftmost()->leaf());
1976 assert(rightmost()->leaf());
1977 } else {
1978 assert(size() == 0);
1979 assert(leftmost() == NULL);
1980 assert(rightmost() == NULL);
1981 }
1982}
1983
1984template <typename P>
1985void btree<P>::rebalance_or_split(iterator *iter) {
1986 node_type *&node = iter->node;
1987 int &insert_position = iter->position;
1988 assert(node->count() == node->max_count());
1989
1990 // First try to make room on the node by rebalancing.
1991 node_type *parent = node->parent();
1992 if (node != root()) {
1993 if (node->position() > 0) {
1994 // Try rebalancing with our left sibling.
1995 node_type *left = parent->child(node->position() - 1);
1996 if (left->count() < left->max_count()) {
1997 // We bias rebalancing based on the position being inserted. If we're
1998 // inserting at the end of the right node then we bias rebalancing to
1999 // fill up the left node.
2000 int to_move = (left->max_count() - left->count()) /
2001 (1 + (insert_position < left->max_count()));
2002 to_move = std::max(1, to_move);
2003
2004 if (((insert_position - to_move) >= 0) ||
2005 ((left->count() + to_move) < left->max_count())) {
2006 left->rebalance_right_to_left(node, to_move);
2007
2008 assert(node->max_count() - node->count() == to_move);
2009 insert_position = insert_position - to_move;
2010 if (insert_position < 0) {
2011 insert_position = insert_position + left->count() + 1;
2012 node = left;
2013 }
2014
2015 assert(node->count() < node->max_count());
2016 return;
2017 }
2018 }
2019 }
2020
2021 if (node->position() < parent->count()) {
2022 // Try rebalancing with our right sibling.
2023 node_type *right = parent->child(node->position() + 1);
2024 if (right->count() < right->max_count()) {
2025 // We bias rebalancing based on the position being inserted. If we're
2026 // inserting at the beginning of the left node then we bias rebalancing
2027 // to fill up the right node.
2028 int to_move = (right->max_count() - right->count()) /
2029 (1 + (insert_position > 0));
2030 to_move = std::max(1, to_move);
2031
2032 if ((insert_position <= (node->count() - to_move)) ||
2033 ((right->count() + to_move) < right->max_count())) {
2034 node->rebalance_left_to_right(right, to_move);
2035
2036 if (insert_position > node->count()) {
2037 insert_position = insert_position - node->count() - 1;
2038 node = right;
2039 }
2040
2041 assert(node->count() < node->max_count());
2042 return;
2043 }
2044 }
2045 }
2046
2047 // Rebalancing failed, make sure there is room on the parent node for a new
2048 // value.
2049 if (parent->count() == parent->max_count()) {
2050 iterator parent_iter(node->parent(), node->position());
2051 rebalance_or_split(&parent_iter);
2052 }
2053 } else {
2054 // Rebalancing not possible because this is the root node.
2055 if (root()->leaf()) {
2056 // The root node is currently a leaf node: create a new root node and set
2057 // the current root node as the child of the new root.
2058 parent = new_internal_root_node();
2059 parent->set_child(0, root());
2060 *mutable_root() = parent;
2061 assert(*mutable_rightmost() == parent->child(0));
2062 } else {
2063 // The root node is an internal node. We do not want to create a new root
2064 // node because the root node is special and holds the size of the tree
2065 // and a pointer to the rightmost node. So we create a new internal node
2066 // and move all of the items on the current root into the new node.
2067 parent = new_internal_node(parent);
2068 parent->set_child(0, parent);
2069 parent->swap(root());
2070 node = parent;
2071 }
2072 }
2073
2074 // Split the node.
2075 node_type *split_node;
2076 if (node->leaf()) {
2077 split_node = new_leaf_node(parent);
2078 node->split(split_node, insert_position);
2079 if (rightmost() == node) {
2080 *mutable_rightmost() = split_node;
2081 }
2082 } else {
2083 split_node = new_internal_node(parent);
2084 node->split(split_node, insert_position);
2085 }
2086
2087 if (insert_position > node->count()) {
2088 insert_position = insert_position - node->count() - 1;
2089 node = split_node;
2090 }
2091}
2092
2093template <typename P>
2094void btree<P>::merge_nodes(node_type *left, node_type *right) {
2095 left->merge(right);
2096 if (right->leaf()) {
2097 if (rightmost() == right) {
2098 *mutable_rightmost() = left;
2099 }
2100 delete_leaf_node(right);
2101 } else {
2102 delete_internal_node(right);
2103 }
2104}
2105
2106template <typename P>
2107bool btree<P>::try_merge_or_rebalance(iterator *iter) {
2108 node_type *parent = iter->node->parent();
2109 if (iter->node->position() > 0) {
2110 // Try merging with our left sibling.
2111 node_type *left = parent->child(iter->node->position() - 1);
2112 if ((1 + left->count() + iter->node->count()) <= left->max_count()) {
2113 iter->position += 1 + left->count();
2114 merge_nodes(left, iter->node);
2115 iter->node = left;
2116 return true;
2117 }
2118 }
2119 if (iter->node->position() < parent->count()) {
2120 // Try merging with our right sibling.
2121 node_type *right = parent->child(iter->node->position() + 1);
2122 if ((1 + iter->node->count() + right->count()) <= right->max_count()) {
2123 merge_nodes(iter->node, right);
2124 return true;
2125 }
2126 // Try rebalancing with our right sibling. We don't perform rebalancing if
2127 // we deleted the first element from iter->node and the node is not
2128 // empty. This is a small optimization for the common pattern of deleting
2129 // from the front of the tree.
2130 if ((right->count() > kMinNodeValues) &&
2131 ((iter->node->count() == 0) ||
2132 (iter->position > 0))) {
2133 int to_move = (right->count() - iter->node->count()) / 2;
2134 to_move = std::min(to_move, right->count() - 1);
2135 iter->node->rebalance_right_to_left(right, to_move);
2136 return false;
2137 }
2138 }
2139 if (iter->node->position() > 0) {
2140 // Try rebalancing with our left sibling. We don't perform rebalancing if
2141 // we deleted the last element from iter->node and the node is not
2142 // empty. This is a small optimization for the common pattern of deleting
2143 // from the back of the tree.
2144 node_type *left = parent->child(iter->node->position() - 1);
2145 if ((left->count() > kMinNodeValues) &&
2146 ((iter->node->count() == 0) ||
2147 (iter->position < iter->node->count()))) {
2148 int to_move = (left->count() - iter->node->count()) / 2;
2149 to_move = std::min(to_move, left->count() - 1);
2150 left->rebalance_left_to_right(iter->node, to_move);
2151 iter->position += to_move;
2152 return false;
2153 }
2154 }
2155 return false;
2156}
2157
2158template <typename P>
2159void btree<P>::try_shrink() {
2160 if (root()->count() > 0) {
2161 return;
2162 }
2163 // Deleted the last item on the root node, shrink the height of the tree.
2164 if (root()->leaf()) {
2165 assert(size() == 0);
2166 delete_leaf_node(root());
2167 *mutable_root() = NULL;
2168 } else {
2169 node_type *child = root()->child(0);
2170 if (child->leaf()) {
2171 // The child is a leaf node so simply make it the root node in the tree.
2172 child->make_root();
2173 delete_internal_root_node();
2174 *mutable_root() = child;
2175 } else {
2176 // The child is an internal node. We want to keep the existing root node
2177 // so we move all of the values from the child node into the existing
2178 // (empty) root node.
2179 child->swap(root());
2180 delete_internal_node(child);
2181 }
2182 }
2183}
2184
2185template <typename P> template <typename IterType>
2186inline IterType btree<P>::internal_last(IterType iter) {
2187 while (iter.node && iter.position == iter.node->count()) {
2188 iter.position = iter.node->position();
2189 iter.node = iter.node->parent();
2190 if (iter.node->leaf()) {
2191 iter.node = NULL;
2192 }
2193 }
2194 return iter;
2195}
2196
2197template <typename P>
2198inline typename btree<P>::iterator
2199btree<P>::internal_insert(iterator iter, const value_type &v) {
2200 if (!iter.node->leaf()) {
2201 // We can't insert on an internal node. Instead, we'll insert after the
2202 // previous value which is guaranteed to be on a leaf node.
2203 --iter;
2204 ++iter.position;
2205 }
2206 if (iter.node->count() == iter.node->max_count()) {
2207 // Make room in the leaf for the new item.
2208 if (iter.node->max_count() < kNodeValues) {
2209 // Insertion into the root where the root is smaller that the full node
2210 // size. Simply grow the size of the root node.
2211 assert(iter.node == root());
2212 iter.node = new_leaf_root_node(
2213 std::min<int>(kNodeValues, 2 * iter.node->max_count()));
2214 iter.node->swap(root());
2215 delete_leaf_node(root());
2216 *mutable_root() = iter.node;
2217 } else {
2218 rebalance_or_split(&iter);
2219 ++*mutable_size();
2220 }
2221 } else if (!root()->leaf()) {
2222 ++*mutable_size();
2223 }
2224 iter.node->insert_value(iter.position, v);
2225 return iter;
2226}
2227
2228template <typename P> template <typename IterType>
2229inline std::pair<IterType, int> btree<P>::internal_locate(
2230 const key_type &key, IterType iter) const {
2231 return internal_locate_type::dispatch(key, *this, iter);
2232}
2233
2234template <typename P> template <typename IterType>
2235inline std::pair<IterType, int> btree<P>::internal_locate_plain_compare(
2236 const key_type &key, IterType iter) const {
2237 for (;;) {
2238 iter.position = iter.node->lower_bound(key, key_comp());
2239 if (iter.node->leaf()) {
2240 break;
2241 }
2242 iter.node = iter.node->child(iter.position);
2243 }
2244 return std::make_pair(iter, 0);
2245}
2246
2247template <typename P> template <typename IterType>
2248inline std::pair<IterType, int> btree<P>::internal_locate_compare_to(
2249 const key_type &key, IterType iter) const {
2250 for (;;) {
2251 int res = iter.node->lower_bound(key, key_comp());
2252 iter.position = res & kMatchMask;
2253 if (res & kExactMatch) {
2254 return std::make_pair(iter, static_cast<int>(kExactMatch));
2255 }
2256 if (iter.node->leaf()) {
2257 break;
2258 }
2259 iter.node = iter.node->child(iter.position);
2260 }
2261 return std::make_pair(iter, -kExactMatch);
2262}
2263
2264template <typename P> template <typename IterType>
2265IterType btree<P>::internal_lower_bound(
2266 const key_type &key, IterType iter) const {
2267 if (iter.node) {
2268 for (;;) {
2269 iter.position =
2270 iter.node->lower_bound(key, key_comp()) & kMatchMask;
2271 if (iter.node->leaf()) {
2272 break;
2273 }
2274 iter.node = iter.node->child(iter.position);
2275 }
2276 iter = internal_last(iter);
2277 }
2278 return iter;
2279}
2280
2281template <typename P> template <typename IterType>
2282IterType btree<P>::internal_upper_bound(
2283 const key_type &key, IterType iter) const {
2284 if (iter.node) {
2285 for (;;) {
2286 iter.position = iter.node->upper_bound(key, key_comp());
2287 if (iter.node->leaf()) {
2288 break;
2289 }
2290 iter.node = iter.node->child(iter.position);
2291 }
2292 iter = internal_last(iter);
2293 }
2294 return iter;
2295}
2296
2297template <typename P> template <typename IterType>
2298IterType btree<P>::internal_find_unique(
2299 const key_type &key, IterType iter) const {
2300 if (iter.node) {
2301 std::pair<IterType, int> res = internal_locate(key, iter);
2302 if (res.second == kExactMatch) {
2303 return res.first;
2304 }
2305 if (!res.second) {
2306 iter = internal_last(res.first);
2307 if (iter.node && !compare_keys(key, iter.key())) {
2308 return iter;
2309 }
2310 }
2311 }
2312 return IterType(NULL, 0);
2313}
2314
2315template <typename P> template <typename IterType>
2316IterType btree<P>::internal_find_multi(
2317 const key_type &key, IterType iter) const {
2318 if (iter.node) {
2319 iter = internal_lower_bound(key, iter);
2320 if (iter.node) {
2321 iter = internal_last(iter);
2322 if (iter.node && !compare_keys(key, iter.key())) {
2323 return iter;
2324 }
2325 }
2326 }
2327 return IterType(NULL, 0);
2328}
2329
2330template <typename P>
2331void btree<P>::internal_clear(node_type *node) {
2332 if (!node->leaf()) {
2333 for (int i = 0; i <= node->count(); ++i) {
2334 internal_clear(node->child(i));
2335 }
2336 if (node == root()) {
2337 delete_internal_root_node();
2338 } else {
2339 delete_internal_node(node);
2340 }
2341 } else {
2342 delete_leaf_node(node);
2343 }
2344}
2345
2346template <typename P>
2347void btree<P>::internal_dump(
2348 std::ostream &os, const node_type *node, int level) const {
2349 for (int i = 0; i < node->count(); ++i) {
2350 if (!node->leaf()) {
2351 internal_dump(os, node->child(i), level + 1);
2352 }
2353 for (int j = 0; j < level; ++j) {
2354 os << " ";
2355 }
2356 os << node->key(i) << " [" << level << "]\n";
2357 }
2358 if (!node->leaf()) {
2359 internal_dump(os, node->child(node->count()), level + 1);
2360 }
2361}
2362
2363template <typename P>
2364int btree<P>::internal_verify(
2365 const node_type *node, const key_type *lo, const key_type *hi) const {
2366 assert(node->count() > 0);
2367 assert(node->count() <= node->max_count());
2368 if (lo) {
2369 assert(!compare_keys(node->key(0), *lo));
2370 }
2371 if (hi) {
2372 assert(!compare_keys(*hi, node->key(node->count() - 1)));
2373 }
2374 for (int i = 1; i < node->count(); ++i) {
2375 assert(!compare_keys(node->key(i), node->key(i - 1)));
2376 }
2377 int count = node->count();
2378 if (!node->leaf()) {
2379 for (int i = 0; i <= node->count(); ++i) {
2380 assert(node->child(i) != NULL);
2381 assert(node->child(i)->parent() == node);
2382 assert(node->child(i)->position() == i);
2383 count += internal_verify(
2384 node->child(i),
2385 (i == 0) ? lo : &node->key(i - 1),
2386 (i == node->count()) ? hi : &node->key(i));
2387 }
2388 }
2389 return count;
2390}
2391
2392} // namespace btree
2393
2394#endif // UTIL_BTREE_BTREE_H__
diff --git a/xdelta3/cpp-btree/btree_bench.cc b/xdelta3/cpp-btree/btree_bench.cc
new file mode 100644
index 0000000..6eaed99
--- /dev/null
+++ b/xdelta3/cpp-btree/btree_bench.cc
@@ -0,0 +1,593 @@
1// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include <stdint.h>
16#include <stdlib.h>
17#include <algorithm>
18#include <functional>
19#include <map>
20#include <set>
21#include <string>
22#include <sys/time.h>
23#include <type_traits>
24#include <vector>
25
26#include "gflags/gflags.h"
27#include "btree_map.h"
28#include "btree_set.h"
29#include "btree_test.h"
30
31DEFINE_int32(test_random_seed, 123456789, "Seed for srand()");
32DEFINE_int32(benchmark_max_iters, 10000000, "Maximum test iterations");
33DEFINE_int32(benchmark_min_iters, 100, "Minimum test iterations");
34DEFINE_int32(benchmark_target_seconds, 1,
35 "Attempt to benchmark for this many seconds");
36
37using std::allocator;
38using std::less;
39using std::map;
40using std::max;
41using std::min;
42using std::multimap;
43using std::multiset;
44using std::set;
45using std::string;
46using std::vector;
47
48namespace btree {
49namespace {
50
51struct RandGen {
52 typedef ptrdiff_t result_type;
53 RandGen(result_type seed) {
54 srand(seed);
55 }
56 result_type operator()(result_type l) {
57 return rand() % l;
58 }
59};
60
61struct BenchmarkRun {
62 BenchmarkRun(const char *name, void (*func)(int));
63 void Run();
64 void Stop();
65 void Start();
66 void Reset();
67
68 BenchmarkRun *next_benchmark;
69 const char *benchmark_name;
70 void (*benchmark_func)(int);
71 int64_t accum_micros;
72 int64_t last_started;
73};
74
75BenchmarkRun *first_benchmark;
76BenchmarkRun *current_benchmark;
77
78int64_t get_micros () {
79 timeval tv;
80 gettimeofday(&tv, NULL);
81 return tv.tv_sec * 1000000 + tv.tv_usec;
82}
83
84BenchmarkRun::BenchmarkRun(const char *name, void (*func)(int))
85 : next_benchmark(first_benchmark),
86 benchmark_name(name),
87 benchmark_func(func),
88 accum_micros(0),
89 last_started(0) {
90 first_benchmark = this;
91}
92
93#define BTREE_BENCHMARK(name) \
94 BTREE_BENCHMARK2(#name, name, __COUNTER__)
95#define BTREE_BENCHMARK2(name, func, counter) \
96 BTREE_BENCHMARK3(name, func, counter)
97#define BTREE_BENCHMARK3(name, func, counter) \
98 BenchmarkRun bench ## counter (name, func)
99
100void StopBenchmarkTiming() {
101 current_benchmark->Stop();
102}
103
104void StartBenchmarkTiming() {
105 current_benchmark->Start();
106}
107
108void RunBenchmarks() {
109 for (BenchmarkRun *bench = first_benchmark; bench;
110 bench = bench->next_benchmark) {
111 bench->Run();
112 }
113}
114
115void BenchmarkRun::Start() {
116 assert(!last_started);
117 last_started = get_micros();
118}
119
120void BenchmarkRun::Stop() {
121 if (last_started == 0) {
122 return;
123 }
124 accum_micros += get_micros() - last_started;
125 last_started = 0;
126}
127
128void BenchmarkRun::Reset() {
129 last_started = 0;
130 accum_micros = 0;
131}
132
133void BenchmarkRun::Run() {
134 assert(current_benchmark == NULL);
135 current_benchmark = this;
136 int iters = FLAGS_benchmark_min_iters;
137 for (;;) {
138 Reset();
139 Start();
140 benchmark_func(iters);
141 Stop();
142 if (accum_micros > FLAGS_benchmark_target_seconds * 1000000 ||
143 iters >= FLAGS_benchmark_max_iters) {
144 break;
145 } else if (accum_micros == 0) {
146 iters *= 100;
147 } else {
148 int64_t target_micros = FLAGS_benchmark_target_seconds * 1000000;
149 iters = target_micros * iters / accum_micros;
150 }
151 iters = min(iters, FLAGS_benchmark_max_iters);
152 }
153 std::cout << benchmark_name << "\t"
154 << accum_micros * 1000 / iters << "\t"
155 << iters;
156 current_benchmark = NULL;
157}
158
159// Used to avoid compiler optimizations for these benchmarks.
160template <typename T>
161void sink(const T& t0) {
162 volatile T t = t0;
163}
164
165// Benchmark insertion of values into a container.
166template <typename T>
167void BM_Insert(int n) {
168 typedef typename std::remove_const<typename T::value_type>::type V;
169 typename KeyOfValue<typename T::key_type, V>::type key_of_value;
170
171 // Disable timing while we perform some initialization.
172 StopBenchmarkTiming();
173
174 T container;
175 vector<V> values = GenerateValues<V>(FLAGS_benchmark_values);
176 for (int i = 0; i < values.size(); i++) {
177 container.insert(values[i]);
178 }
179
180 for (int i = 0; i < n; ) {
181 // Remove and re-insert 10% of the keys
182 int m = min(n - i, FLAGS_benchmark_values / 10);
183
184 for (int j = i; j < i + m; j++) {
185 int x = j % FLAGS_benchmark_values;
186 container.erase(key_of_value(values[x]));
187 }
188
189 StartBenchmarkTiming();
190
191 for (int j = i; j < i + m; j++) {
192 int x = j % FLAGS_benchmark_values;
193 container.insert(values[x]);
194 }
195
196 StopBenchmarkTiming();
197
198 i += m;
199 }
200}
201
202// Benchmark lookup of values in a container.
203template <typename T>
204void BM_Lookup(int n) {
205 typedef typename std::remove_const<typename T::value_type>::type V;
206 typename KeyOfValue<typename T::key_type, V>::type key_of_value;
207
208 // Disable timing while we perform some initialization.
209 StopBenchmarkTiming();
210
211 T container;
212 vector<V> values = GenerateValues<V>(FLAGS_benchmark_values);
213
214 for (int i = 0; i < values.size(); i++) {
215 container.insert(values[i]);
216 }
217
218 V r = V();
219
220 StartBenchmarkTiming();
221
222 for (int i = 0; i < n; i++) {
223 int m = i % values.size();
224 r = *container.find(key_of_value(values[m]));
225 }
226
227 StopBenchmarkTiming();
228
229 sink(r); // Keep compiler from optimizing away r.
230}
231
232// Benchmark lookup of values in a full container, meaning that values
233// are inserted in-order to take advantage of biased insertion, which
234// yields a full tree.
235template <typename T>
236void BM_FullLookup(int n) {
237 typedef typename std::remove_const<typename T::value_type>::type V;
238 typename KeyOfValue<typename T::key_type, V>::type key_of_value;
239
240 // Disable timing while we perform some initialization.
241 StopBenchmarkTiming();
242
243 T container;
244 vector<V> values = GenerateValues<V>(FLAGS_benchmark_values);
245 vector<V> sorted(values);
246 sort(sorted.begin(), sorted.end());
247
248 for (int i = 0; i < sorted.size(); i++) {
249 container.insert(sorted[i]);
250 }
251
252 V r = V();
253
254 StartBenchmarkTiming();
255
256 for (int i = 0; i < n; i++) {
257 int m = i % values.size();
258 r = *container.find(key_of_value(values[m]));
259 }
260
261 StopBenchmarkTiming();
262
263 sink(r); // Keep compiler from optimizing away r.
264}
265
266// Benchmark deletion of values from a container.
267template <typename T>
268void BM_Delete(int n) {
269 typedef typename std::remove_const<typename T::value_type>::type V;
270 typename KeyOfValue<typename T::key_type, V>::type key_of_value;
271
272 // Disable timing while we perform some initialization.
273 StopBenchmarkTiming();
274
275 T container;
276 vector<V> values = GenerateValues<V>(FLAGS_benchmark_values);
277 for (int i = 0; i < values.size(); i++) {
278 container.insert(values[i]);
279 }
280
281 for (int i = 0; i < n; ) {
282 // Remove and re-insert 10% of the keys
283 int m = min(n - i, FLAGS_benchmark_values / 10);
284
285 StartBenchmarkTiming();
286
287 for (int j = i; j < i + m; j++) {
288 int x = j % FLAGS_benchmark_values;
289 container.erase(key_of_value(values[x]));
290 }
291
292 StopBenchmarkTiming();
293
294 for (int j = i; j < i + m; j++) {
295 int x = j % FLAGS_benchmark_values;
296 container.insert(values[x]);
297 }
298
299 i += m;
300 }
301}
302
303// Benchmark steady-state insert (into first half of range) and remove
304// (from second second half of range), treating the container
305// approximately like a queue with log-time access for all elements.
306// This benchmark does not test the case where insertion and removal
307// happen in the same region of the tree. This benchmark counts two
308// value constructors.
309template <typename T>
310void BM_QueueAddRem(int n) {
311 typedef typename std::remove_const<typename T::value_type>::type V;
312 typename KeyOfValue<typename T::key_type, V>::type key_of_value;
313
314 // Disable timing while we perform some initialization.
315 StopBenchmarkTiming();
316 assert(FLAGS_benchmark_values % 2 == 0);
317
318 T container;
319
320 const int half = FLAGS_benchmark_values / 2;
321 vector<int> remove_keys(half);
322 vector<int> add_keys(half);
323
324 for (int i = 0; i < half; i++) {
325 remove_keys[i] = i;
326 add_keys[i] = i;
327 }
328
329 RandGen rand(FLAGS_test_random_seed);
330
331 random_shuffle(remove_keys.begin(), remove_keys.end(), rand);
332 random_shuffle(add_keys.begin(), add_keys.end(), rand);
333
334 Generator<V> g(FLAGS_benchmark_values + FLAGS_benchmark_max_iters);
335
336 for (int i = 0; i < half; i++) {
337 container.insert(g(add_keys[i]));
338 container.insert(g(half + remove_keys[i]));
339 }
340
341 // There are three parts each of size "half":
342 // 1 is being deleted from [offset - half, offset)
343 // 2 is standing [offset, offset + half)
344 // 3 is being inserted into [offset + half, offset + 2 * half)
345 int offset = 0;
346
347 StartBenchmarkTiming();
348
349 for (int i = 0; i < n; i++) {
350 int idx = i % half;
351
352 if (idx == 0) {
353 StopBenchmarkTiming();
354 random_shuffle(remove_keys.begin(), remove_keys.end(), rand);
355 random_shuffle(add_keys.begin(), add_keys.end(), rand);
356 offset += half;
357 StartBenchmarkTiming();
358 }
359
360 int e = container.erase(key_of_value(g(offset - half + remove_keys[idx])));
361 assert(e == 1);
362 container.insert(g(offset + half + add_keys[idx]));
363 }
364
365 StopBenchmarkTiming();
366}
367
368// Mixed insertion and deletion in the same range using pre-constructed values.
369template <typename T>
370void BM_MixedAddRem(int n) {
371 typedef typename std::remove_const<typename T::value_type>::type V;
372 typename KeyOfValue<typename T::key_type, V>::type key_of_value;
373
374 // Disable timing while we perform some initialization.
375 StopBenchmarkTiming();
376 assert(FLAGS_benchmark_values % 2 == 0);
377
378 T container;
379 RandGen rand(FLAGS_test_random_seed);
380
381 vector<V> values = GenerateValues<V>(FLAGS_benchmark_values * 2);
382
383 // Create two random shuffles
384 vector<int> remove_keys(FLAGS_benchmark_values);
385 vector<int> add_keys(FLAGS_benchmark_values);
386
387 // Insert the first half of the values (already in random order)
388 for (int i = 0; i < FLAGS_benchmark_values; i++) {
389 container.insert(values[i]);
390
391 // remove_keys and add_keys will be swapped before each round,
392 // therefore fill add_keys here w/ the keys being inserted, so
393 // they'll be the first to be removed.
394 remove_keys[i] = i + FLAGS_benchmark_values;
395 add_keys[i] = i;
396 }
397
398 StartBenchmarkTiming();
399
400 for (int i = 0; i < n; i++) {
401 int idx = i % FLAGS_benchmark_values;
402
403 if (idx == 0) {
404 StopBenchmarkTiming();
405 remove_keys.swap(add_keys);
406 random_shuffle(remove_keys.begin(), remove_keys.end(), rand);
407 random_shuffle(add_keys.begin(), add_keys.end(), rand);
408 StartBenchmarkTiming();
409 }
410
411 int e = container.erase(key_of_value(values[remove_keys[idx]]));
412 assert(e == 1);
413 container.insert(values[add_keys[idx]]);
414 }
415
416 StopBenchmarkTiming();
417}
418
419// Insertion at end, removal from the beginning. This benchmark
420// counts two value constructors.
421template <typename T>
422void BM_Fifo(int n) {
423 typedef typename std::remove_const<typename T::value_type>::type V;
424
425 // Disable timing while we perform some initialization.
426 StopBenchmarkTiming();
427
428 T container;
429 Generator<V> g(FLAGS_benchmark_values + FLAGS_benchmark_max_iters);
430
431 for (int i = 0; i < FLAGS_benchmark_values; i++) {
432 container.insert(g(i));
433 }
434
435 StartBenchmarkTiming();
436
437 for (int i = 0; i < n; i++) {
438 container.erase(container.begin());
439 container.insert(container.end(), g(i + FLAGS_benchmark_values));
440 }
441
442 StopBenchmarkTiming();
443}
444
445// Iteration (forward) through the tree
446template <typename T>
447void BM_FwdIter(int n) {
448 typedef typename std::remove_const<typename T::value_type>::type V;
449
450 // Disable timing while we perform some initialization.
451 StopBenchmarkTiming();
452
453 T container;
454 vector<V> values = GenerateValues<V>(FLAGS_benchmark_values);
455
456 for (int i = 0; i < FLAGS_benchmark_values; i++) {
457 container.insert(values[i]);
458 }
459
460 typename T::iterator iter;
461
462 V r = V();
463
464 StartBenchmarkTiming();
465
466 for (int i = 0; i < n; i++) {
467 int idx = i % FLAGS_benchmark_values;
468
469 if (idx == 0) {
470 iter = container.begin();
471 }
472 r = *iter;
473 ++iter;
474 }
475
476 StopBenchmarkTiming();
477
478 sink(r); // Keep compiler from optimizing away r.
479}
480
481typedef set<int32_t> stl_set_int32;
482typedef set<int64_t> stl_set_int64;
483typedef set<string> stl_set_string;
484
485typedef map<int32_t, intptr_t> stl_map_int32;
486typedef map<int64_t, intptr_t> stl_map_int64;
487typedef map<string, intptr_t> stl_map_string;
488
489typedef multiset<int32_t> stl_multiset_int32;
490typedef multiset<int64_t> stl_multiset_int64;
491typedef multiset<string> stl_multiset_string;
492
493typedef multimap<int32_t, intptr_t> stl_multimap_int32;
494typedef multimap<int64_t, intptr_t> stl_multimap_int64;
495typedef multimap<string, intptr_t> stl_multimap_string;
496
497#define MY_BENCHMARK_TYPES2(value, name, size) \
498 typedef btree ## _set<value, less<value>, allocator<value>, size> \
499 btree ## _ ## size ## _set_ ## name; \
500 typedef btree ## _map<value, int, less<value>, allocator<value>, size> \
501 btree ## _ ## size ## _map_ ## name; \
502 typedef btree ## _multiset<value, less<value>, allocator<value>, size> \
503 btree ## _ ## size ## _multiset_ ## name; \
504 typedef btree ## _multimap<value, int, less<value>, allocator<value>, size> \
505 btree ## _ ## size ## _multimap_ ## name
506
507#define MY_BENCHMARK_TYPES(value, name) \
508 MY_BENCHMARK_TYPES2(value, name, 128); \
509 MY_BENCHMARK_TYPES2(value, name, 160); \
510 MY_BENCHMARK_TYPES2(value, name, 192); \
511 MY_BENCHMARK_TYPES2(value, name, 224); \
512 MY_BENCHMARK_TYPES2(value, name, 256); \
513 MY_BENCHMARK_TYPES2(value, name, 288); \
514 MY_BENCHMARK_TYPES2(value, name, 320); \
515 MY_BENCHMARK_TYPES2(value, name, 352); \
516 MY_BENCHMARK_TYPES2(value, name, 384); \
517 MY_BENCHMARK_TYPES2(value, name, 416); \
518 MY_BENCHMARK_TYPES2(value, name, 448); \
519 MY_BENCHMARK_TYPES2(value, name, 480); \
520 MY_BENCHMARK_TYPES2(value, name, 512); \
521 MY_BENCHMARK_TYPES2(value, name, 1024); \
522 MY_BENCHMARK_TYPES2(value, name, 1536); \
523 MY_BENCHMARK_TYPES2(value, name, 2048)
524
525MY_BENCHMARK_TYPES(int32_t, int32);
526MY_BENCHMARK_TYPES(int64_t, int64);
527MY_BENCHMARK_TYPES(string, string);
528
529#define MY_BENCHMARK4(type, name, func) \
530 void BM_ ## type ## _ ## name(int n) { BM_ ## func <type>(n); } \
531 BTREE_BENCHMARK(BM_ ## type ## _ ## name)
532
533// Define NODESIZE_TESTING when running btree_perf.py.
534
535#ifdef NODESIZE_TESTING
536#define MY_BENCHMARK3(tree, type, name, func) \
537 MY_BENCHMARK4(tree ## _128_ ## type, name, func); \
538 MY_BENCHMARK4(tree ## _160_ ## type, name, func); \
539 MY_BENCHMARK4(tree ## _192_ ## type, name, func); \
540 MY_BENCHMARK4(tree ## _224_ ## type, name, func); \
541 MY_BENCHMARK4(tree ## _256_ ## type, name, func); \
542 MY_BENCHMARK4(tree ## _288_ ## type, name, func); \
543 MY_BENCHMARK4(tree ## _320_ ## type, name, func); \
544 MY_BENCHMARK4(tree ## _352_ ## type, name, func); \
545 MY_BENCHMARK4(tree ## _384_ ## type, name, func); \
546 MY_BENCHMARK4(tree ## _416_ ## type, name, func); \
547 MY_BENCHMARK4(tree ## _448_ ## type, name, func); \
548 MY_BENCHMARK4(tree ## _480_ ## type, name, func); \
549 MY_BENCHMARK4(tree ## _512_ ## type, name, func); \
550 MY_BENCHMARK4(tree ## _1024_ ## type, name, func); \
551 MY_BENCHMARK4(tree ## _1536_ ## type, name, func); \
552 MY_BENCHMARK4(tree ## _2048_ ## type, name, func)
553#else
554#define MY_BENCHMARK3(tree, type, name, func) \
555 MY_BENCHMARK4(tree ## _256_ ## type, name, func); \
556 MY_BENCHMARK4(tree ## _2048_ ## type, name, func)
557#endif
558
559#define MY_BENCHMARK2(type, name, func) \
560 MY_BENCHMARK4(stl_ ## type, name, func); \
561 MY_BENCHMARK3(btree, type, name, func)
562
563#define MY_BENCHMARK(type) \
564 MY_BENCHMARK2(type, insert, Insert); \
565 MY_BENCHMARK2(type, lookup, Lookup); \
566 MY_BENCHMARK2(type, fulllookup, FullLookup); \
567 MY_BENCHMARK2(type, delete, Delete); \
568 MY_BENCHMARK2(type, queueaddrem, QueueAddRem); \
569 MY_BENCHMARK2(type, mixedaddrem, MixedAddRem); \
570 MY_BENCHMARK2(type, fifo, Fifo); \
571 MY_BENCHMARK2(type, fwditer, FwdIter)
572
573MY_BENCHMARK(set_int32);
574MY_BENCHMARK(map_int32);
575MY_BENCHMARK(set_int64);
576MY_BENCHMARK(map_int64);
577MY_BENCHMARK(set_string);
578MY_BENCHMARK(map_string);
579
580MY_BENCHMARK(multiset_int32);
581MY_BENCHMARK(multimap_int32);
582MY_BENCHMARK(multiset_int64);
583MY_BENCHMARK(multimap_int64);
584MY_BENCHMARK(multiset_string);
585MY_BENCHMARK(multimap_string);
586
587} // namespace
588} // namespace btree
589
590int main(int argc, char **argv) {
591 btree::RunBenchmarks();
592 return 0;
593}
diff --git a/xdelta3/cpp-btree/btree_container.h b/xdelta3/cpp-btree/btree_container.h
new file mode 100644
index 0000000..fb617ab
--- /dev/null
+++ b/xdelta3/cpp-btree/btree_container.h
@@ -0,0 +1,349 @@
1// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef UTIL_BTREE_BTREE_CONTAINER_H__
16#define UTIL_BTREE_BTREE_CONTAINER_H__
17
18#include <iosfwd>
19#include <utility>
20
21#include "btree.h"
22
23namespace btree {
24
25// A common base class for btree_set, btree_map, btree_multiset and
26// btree_multimap.
27template <typename Tree>
28class btree_container {
29 typedef btree_container<Tree> self_type;
30
31 public:
32 typedef typename Tree::params_type params_type;
33 typedef typename Tree::key_type key_type;
34 typedef typename Tree::value_type value_type;
35 typedef typename Tree::key_compare key_compare;
36 typedef typename Tree::allocator_type allocator_type;
37 typedef typename Tree::pointer pointer;
38 typedef typename Tree::const_pointer const_pointer;
39 typedef typename Tree::reference reference;
40 typedef typename Tree::const_reference const_reference;
41 typedef typename Tree::size_type size_type;
42 typedef typename Tree::difference_type difference_type;
43 typedef typename Tree::iterator iterator;
44 typedef typename Tree::const_iterator const_iterator;
45 typedef typename Tree::reverse_iterator reverse_iterator;
46 typedef typename Tree::const_reverse_iterator const_reverse_iterator;
47
48 public:
49 // Default constructor.
50 btree_container(const key_compare &comp, const allocator_type &alloc)
51 : tree_(comp, alloc) {
52 }
53
54 // Copy constructor.
55 btree_container(const self_type &x)
56 : tree_(x.tree_) {
57 }
58
59 // Iterator routines.
60 iterator begin() { return tree_.begin(); }
61 const_iterator begin() const { return tree_.begin(); }
62 iterator end() { return tree_.end(); }
63 const_iterator end() const { return tree_.end(); }
64 reverse_iterator rbegin() { return tree_.rbegin(); }
65 const_reverse_iterator rbegin() const { return tree_.rbegin(); }
66 reverse_iterator rend() { return tree_.rend(); }
67 const_reverse_iterator rend() const { return tree_.rend(); }
68
69 // Lookup routines.
70 iterator lower_bound(const key_type &key) {
71 return tree_.lower_bound(key);
72 }
73 const_iterator lower_bound(const key_type &key) const {
74 return tree_.lower_bound(key);
75 }
76 iterator upper_bound(const key_type &key) {
77 return tree_.upper_bound(key);
78 }
79 const_iterator upper_bound(const key_type &key) const {
80 return tree_.upper_bound(key);
81 }
82 std::pair<iterator,iterator> equal_range(const key_type &key) {
83 return tree_.equal_range(key);
84 }
85 std::pair<const_iterator,const_iterator> equal_range(const key_type &key) const {
86 return tree_.equal_range(key);
87 }
88
89 // Utility routines.
90 void clear() {
91 tree_.clear();
92 }
93 void swap(self_type &x) {
94 tree_.swap(x.tree_);
95 }
96 void dump(std::ostream &os) const {
97 tree_.dump(os);
98 }
99 void verify() const {
100 tree_.verify();
101 }
102
103 // Size routines.
104 size_type size() const { return tree_.size(); }
105 size_type max_size() const { return tree_.max_size(); }
106 bool empty() const { return tree_.empty(); }
107 size_type height() const { return tree_.height(); }
108 size_type internal_nodes() const { return tree_.internal_nodes(); }
109 size_type leaf_nodes() const { return tree_.leaf_nodes(); }
110 size_type nodes() const { return tree_.nodes(); }
111 size_type bytes_used() const { return tree_.bytes_used(); }
112 static double average_bytes_per_value() {
113 return Tree::average_bytes_per_value();
114 }
115 double fullness() const { return tree_.fullness(); }
116 double overhead() const { return tree_.overhead(); }
117
118 bool operator==(const self_type& x) const {
119 if (size() != x.size()) {
120 return false;
121 }
122 for (const_iterator i = begin(), xi = x.begin(); i != end(); ++i, ++xi) {
123 if (*i != *xi) {
124 return false;
125 }
126 }
127 return true;
128 }
129
130 bool operator!=(const self_type& other) const {
131 return !operator==(other);
132 }
133
134
135 protected:
136 Tree tree_;
137};
138
139template <typename T>
140inline std::ostream& operator<<(std::ostream &os, const btree_container<T> &b) {
141 b.dump(os);
142 return os;
143}
144
145// A common base class for btree_set and safe_btree_set.
146template <typename Tree>
147class btree_unique_container : public btree_container<Tree> {
148 typedef btree_unique_container<Tree> self_type;
149 typedef btree_container<Tree> super_type;
150
151 public:
152 typedef typename Tree::key_type key_type;
153 typedef typename Tree::value_type value_type;
154 typedef typename Tree::size_type size_type;
155 typedef typename Tree::key_compare key_compare;
156 typedef typename Tree::allocator_type allocator_type;
157 typedef typename Tree::iterator iterator;
158 typedef typename Tree::const_iterator const_iterator;
159
160 public:
161 // Default constructor.
162 btree_unique_container(const key_compare &comp = key_compare(),
163 const allocator_type &alloc = allocator_type())
164 : super_type(comp, alloc) {
165 }
166
167 // Copy constructor.
168 btree_unique_container(const self_type &x)
169 : super_type(x) {
170 }
171
172 // Range constructor.
173 template <class InputIterator>
174 btree_unique_container(InputIterator b, InputIterator e,
175 const key_compare &comp = key_compare(),
176 const allocator_type &alloc = allocator_type())
177 : super_type(comp, alloc) {
178 insert(b, e);
179 }
180
181 // Lookup routines.
182 iterator find(const key_type &key) {
183 return this->tree_.find_unique(key);
184 }
185 const_iterator find(const key_type &key) const {
186 return this->tree_.find_unique(key);
187 }
188 size_type count(const key_type &key) const {
189 return this->tree_.count_unique(key);
190 }
191
192 // Insertion routines.
193 std::pair<iterator,bool> insert(const value_type &x) {
194 return this->tree_.insert_unique(x);
195 }
196 iterator insert(iterator position, const value_type &x) {
197 return this->tree_.insert_unique(position, x);
198 }
199 template <typename InputIterator>
200 void insert(InputIterator b, InputIterator e) {
201 this->tree_.insert_unique(b, e);
202 }
203
204 // Deletion routines.
205 int erase(const key_type &key) {
206 return this->tree_.erase_unique(key);
207 }
208 // Erase the specified iterator from the btree. The iterator must be valid
209 // (i.e. not equal to end()). Return an iterator pointing to the node after
210 // the one that was erased (or end() if none exists).
211 iterator erase(const iterator &iter) {
212 return this->tree_.erase(iter);
213 }
214 void erase(const iterator &first, const iterator &last) {
215 this->tree_.erase(first, last);
216 }
217};
218
219// A common base class for btree_map and safe_btree_map.
220template <typename Tree>
221class btree_map_container : public btree_unique_container<Tree> {
222 typedef btree_map_container<Tree> self_type;
223 typedef btree_unique_container<Tree> super_type;
224
225 public:
226 typedef typename Tree::key_type key_type;
227 typedef typename Tree::data_type data_type;
228 typedef typename Tree::value_type value_type;
229 typedef typename Tree::mapped_type mapped_type;
230 typedef typename Tree::key_compare key_compare;
231 typedef typename Tree::allocator_type allocator_type;
232
233 private:
234 // A pointer-like object which only generates its value when
235 // dereferenced. Used by operator[] to avoid constructing an empty data_type
236 // if the key already exists in the map.
237 struct generate_value {
238 generate_value(const key_type &k)
239 : key(k) {
240 }
241 value_type operator*() const {
242 return std::make_pair(key, data_type());
243 }
244 const key_type &key;
245 };
246
247 public:
248 // Default constructor.
249 btree_map_container(const key_compare &comp = key_compare(),
250 const allocator_type &alloc = allocator_type())
251 : super_type(comp, alloc) {
252 }
253
254 // Copy constructor.
255 btree_map_container(const self_type &x)
256 : super_type(x) {
257 }
258
259 // Range constructor.
260 template <class InputIterator>
261 btree_map_container(InputIterator b, InputIterator e,
262 const key_compare &comp = key_compare(),
263 const allocator_type &alloc = allocator_type())
264 : super_type(b, e, comp, alloc) {
265 }
266
267 // Insertion routines.
268 data_type& operator[](const key_type &key) {
269 return this->tree_.insert_unique(key, generate_value(key)).first->second;
270 }
271};
272
273// A common base class for btree_multiset and btree_multimap.
274template <typename Tree>
275class btree_multi_container : public btree_container<Tree> {
276 typedef btree_multi_container<Tree> self_type;
277 typedef btree_container<Tree> super_type;
278
279 public:
280 typedef typename Tree::key_type key_type;
281 typedef typename Tree::value_type value_type;
282 typedef typename Tree::size_type size_type;
283 typedef typename Tree::key_compare key_compare;
284 typedef typename Tree::allocator_type allocator_type;
285 typedef typename Tree::iterator iterator;
286 typedef typename Tree::const_iterator const_iterator;
287
288 public:
289 // Default constructor.
290 btree_multi_container(const key_compare &comp = key_compare(),
291 const allocator_type &alloc = allocator_type())
292 : super_type(comp, alloc) {
293 }
294
295 // Copy constructor.
296 btree_multi_container(const self_type &x)
297 : super_type(x) {
298 }
299
300 // Range constructor.
301 template <class InputIterator>
302 btree_multi_container(InputIterator b, InputIterator e,
303 const key_compare &comp = key_compare(),
304 const allocator_type &alloc = allocator_type())
305 : super_type(comp, alloc) {
306 insert(b, e);
307 }
308
309 // Lookup routines.
310 iterator find(const key_type &key) {
311 return this->tree_.find_multi(key);
312 }
313 const_iterator find(const key_type &key) const {
314 return this->tree_.find_multi(key);
315 }
316 size_type count(const key_type &key) const {
317 return this->tree_.count_multi(key);
318 }
319
320 // Insertion routines.
321 iterator insert(const value_type &x) {
322 return this->tree_.insert_multi(x);
323 }
324 iterator insert(iterator position, const value_type &x) {
325 return this->tree_.insert_multi(position, x);
326 }
327 template <typename InputIterator>
328 void insert(InputIterator b, InputIterator e) {
329 this->tree_.insert_multi(b, e);
330 }
331
332 // Deletion routines.
333 int erase(const key_type &key) {
334 return this->tree_.erase_multi(key);
335 }
336 // Erase the specified iterator from the btree. The iterator must be valid
337 // (i.e. not equal to end()). Return an iterator pointing to the node after
338 // the one that was erased (or end() if none exists).
339 iterator erase(const iterator &iter) {
340 return this->tree_.erase(iter);
341 }
342 void erase(const iterator &first, const iterator &last) {
343 this->tree_.erase(first, last);
344 }
345};
346
347} // namespace btree
348
349#endif // UTIL_BTREE_BTREE_CONTAINER_H__
diff --git a/xdelta3/cpp-btree/btree_map.h b/xdelta3/cpp-btree/btree_map.h
new file mode 100644
index 0000000..b83489f
--- /dev/null
+++ b/xdelta3/cpp-btree/btree_map.h
@@ -0,0 +1,130 @@
1// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// A btree_map<> implements the STL unique sorted associative container
16// interface and the pair associative container interface (a.k.a map<>) using a
17// btree. A btree_multimap<> implements the STL multiple sorted associative
18// container interface and the pair associtive container interface (a.k.a
19// multimap<>) using a btree. See btree.h for details of the btree
20// implementation and caveats.
21
22#ifndef UTIL_BTREE_BTREE_MAP_H__
23#define UTIL_BTREE_BTREE_MAP_H__
24
25#include <algorithm>
26#include <functional>
27#include <memory>
28#include <string>
29#include <utility>
30
31#include "btree.h"
32#include "btree_container.h"
33
34namespace btree {
35
36// The btree_map class is needed mainly for its constructors.
37template <typename Key, typename Value,
38 typename Compare = std::less<Key>,
39 typename Alloc = std::allocator<std::pair<const Key, Value> >,
40 int TargetNodeSize = 256>
41class btree_map : public btree_map_container<
42 btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
43
44 typedef btree_map<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
45 typedef btree_map_params<
46 Key, Value, Compare, Alloc, TargetNodeSize> params_type;
47 typedef btree<params_type> btree_type;
48 typedef btree_map_container<btree_type> super_type;
49
50 public:
51 typedef typename btree_type::key_compare key_compare;
52 typedef typename btree_type::allocator_type allocator_type;
53
54 public:
55 // Default constructor.
56 btree_map(const key_compare &comp = key_compare(),
57 const allocator_type &alloc = allocator_type())
58 : super_type(comp, alloc) {
59 }
60
61 // Copy constructor.
62 btree_map(const self_type &x)
63 : super_type(x) {
64 }
65
66 // Range constructor.
67 template <class InputIterator>
68 btree_map(InputIterator b, InputIterator e,
69 const key_compare &comp = key_compare(),
70 const allocator_type &alloc = allocator_type())
71 : super_type(b, e, comp, alloc) {
72 }
73};
74
75template <typename K, typename V, typename C, typename A, int N>
76inline void swap(btree_map<K, V, C, A, N> &x,
77 btree_map<K, V, C, A, N> &y) {
78 x.swap(y);
79}
80
81// The btree_multimap class is needed mainly for its constructors.
82template <typename Key, typename Value,
83 typename Compare = std::less<Key>,
84 typename Alloc = std::allocator<std::pair<const Key, Value> >,
85 int TargetNodeSize = 256>
86class btree_multimap : public btree_multi_container<
87 btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
88
89 typedef btree_multimap<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
90 typedef btree_map_params<
91 Key, Value, Compare, Alloc, TargetNodeSize> params_type;
92 typedef btree<params_type> btree_type;
93 typedef btree_multi_container<btree_type> super_type;
94
95 public:
96 typedef typename btree_type::key_compare key_compare;
97 typedef typename btree_type::allocator_type allocator_type;
98 typedef typename btree_type::data_type data_type;
99 typedef typename btree_type::mapped_type mapped_type;
100
101 public:
102 // Default constructor.
103 btree_multimap(const key_compare &comp = key_compare(),
104 const allocator_type &alloc = allocator_type())
105 : super_type(comp, alloc) {
106 }
107
108 // Copy constructor.
109 btree_multimap(const self_type &x)
110 : super_type(x) {
111 }
112
113 // Range constructor.
114 template <class InputIterator>
115 btree_multimap(InputIterator b, InputIterator e,
116 const key_compare &comp = key_compare(),
117 const allocator_type &alloc = allocator_type())
118 : super_type(b, e, comp, alloc) {
119 }
120};
121
122template <typename K, typename V, typename C, typename A, int N>
123inline void swap(btree_multimap<K, V, C, A, N> &x,
124 btree_multimap<K, V, C, A, N> &y) {
125 x.swap(y);
126}
127
128} // namespace btree
129
130#endif // UTIL_BTREE_BTREE_MAP_H__
diff --git a/xdelta3/cpp-btree/btree_set.h b/xdelta3/cpp-btree/btree_set.h
new file mode 100644
index 0000000..f9b2e75
--- /dev/null
+++ b/xdelta3/cpp-btree/btree_set.h
@@ -0,0 +1,121 @@
1// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// A btree_set<> implements the STL unique sorted associative container
16// interface (a.k.a set<>) using a btree. A btree_multiset<> implements the STL
17// multiple sorted associative container interface (a.k.a multiset<>) using a
18// btree. See btree.h for details of the btree implementation and caveats.
19
20#ifndef UTIL_BTREE_BTREE_SET_H__
21#define UTIL_BTREE_BTREE_SET_H__
22
23#include <functional>
24#include <memory>
25#include <string>
26
27#include "btree.h"
28#include "btree_container.h"
29
30namespace btree {
31
32// The btree_set class is needed mainly for its constructors.
33template <typename Key,
34 typename Compare = std::less<Key>,
35 typename Alloc = std::allocator<Key>,
36 int TargetNodeSize = 256>
37class btree_set : public btree_unique_container<
38 btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > > {
39
40 typedef btree_set<Key, Compare, Alloc, TargetNodeSize> self_type;
41 typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
42 typedef btree<params_type> btree_type;
43 typedef btree_unique_container<btree_type> super_type;
44
45 public:
46 typedef typename btree_type::key_compare key_compare;
47 typedef typename btree_type::allocator_type allocator_type;
48
49 public:
50 // Default constructor.
51 btree_set(const key_compare &comp = key_compare(),
52 const allocator_type &alloc = allocator_type())
53 : super_type(comp, alloc) {
54 }
55
56 // Copy constructor.
57 btree_set(const self_type &x)
58 : super_type(x) {
59 }
60
61 // Range constructor.
62 template <class InputIterator>
63 btree_set(InputIterator b, InputIterator e,
64 const key_compare &comp = key_compare(),
65 const allocator_type &alloc = allocator_type())
66 : super_type(b, e, comp, alloc) {
67 }
68};
69
70template <typename K, typename C, typename A, int N>
71inline void swap(btree_set<K, C, A, N> &x, btree_set<K, C, A, N> &y) {
72 x.swap(y);
73}
74
75// The btree_multiset class is needed mainly for its constructors.
76template <typename Key,
77 typename Compare = std::less<Key>,
78 typename Alloc = std::allocator<Key>,
79 int TargetNodeSize = 256>
80class btree_multiset : public btree_multi_container<
81 btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > > {
82
83 typedef btree_multiset<Key, Compare, Alloc, TargetNodeSize> self_type;
84 typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
85 typedef btree<params_type> btree_type;
86 typedef btree_multi_container<btree_type> super_type;
87
88 public:
89 typedef typename btree_type::key_compare key_compare;
90 typedef typename btree_type::allocator_type allocator_type;
91
92 public:
93 // Default constructor.
94 btree_multiset(const key_compare &comp = key_compare(),
95 const allocator_type &alloc = allocator_type())
96 : super_type(comp, alloc) {
97 }
98
99 // Copy constructor.
100 btree_multiset(const self_type &x)
101 : super_type(x) {
102 }
103
104 // Range constructor.
105 template <class InputIterator>
106 btree_multiset(InputIterator b, InputIterator e,
107 const key_compare &comp = key_compare(),
108 const allocator_type &alloc = allocator_type())
109 : super_type(b, e, comp, alloc) {
110 }
111};
112
113template <typename K, typename C, typename A, int N>
114inline void swap(btree_multiset<K, C, A, N> &x,
115 btree_multiset<K, C, A, N> &y) {
116 x.swap(y);
117}
118
119} // namespace btree
120
121#endif // UTIL_BTREE_BTREE_SET_H__
diff --git a/xdelta3/cpp-btree/btree_test.cc b/xdelta3/cpp-btree/btree_test.cc
new file mode 100644
index 0000000..6b1837d
--- /dev/null
+++ b/xdelta3/cpp-btree/btree_test.cc
@@ -0,0 +1,270 @@
1// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "gtest/gtest.h"
16#include "btree_map.h"
17#include "btree_set.h"
18#include "btree_test.h"
19
20namespace btree {
21namespace {
22
23template <typename K, int N>
24void SetTest() {
25 typedef TestAllocator<K> TestAlloc;
26 ASSERT_EQ(sizeof(btree_set<K>), sizeof(void*));
27 BtreeTest<btree_set<K, std::less<K>, std::allocator<K>, N>, std::set<K> >();
28 BtreeAllocatorTest<btree_set<K, std::less<K>, TestAlloc, N> >();
29}
30
31template <typename K, int N>
32void MapTest() {
33 typedef TestAllocator<K> TestAlloc;
34 ASSERT_EQ(sizeof(btree_map<K, K>), sizeof(void*));
35 BtreeTest<btree_map<K, K, std::less<K>, std::allocator<K>, N>, std::map<K, K> >();
36 BtreeAllocatorTest<btree_map<K, K, std::less<K>, TestAlloc, N> >();
37 BtreeMapTest<btree_map<K, K, std::less<K>, std::allocator<K>, N> >();
38}
39
40TEST(Btree, set_int32_32) { SetTest<int32_t, 32>(); }
41TEST(Btree, set_int32_64) { SetTest<int32_t, 64>(); }
42TEST(Btree, set_int32_128) { SetTest<int32_t, 128>(); }
43TEST(Btree, set_int32_256) { SetTest<int32_t, 256>(); }
44TEST(Btree, set_int64_256) { SetTest<int64_t, 256>(); }
45TEST(Btree, set_string_256) { SetTest<std::string, 256>(); }
46TEST(Btree, set_pair_256) { SetTest<std::pair<int, int>, 256>(); }
47TEST(Btree, map_int32_256) { MapTest<int32_t, 256>(); }
48TEST(Btree, map_int64_256) { MapTest<int64_t, 256>(); }
49TEST(Btree, map_string_256) { MapTest<std::string, 256>(); }
50TEST(Btree, map_pair_256) { MapTest<std::pair<int, int>, 256>(); }
51
52// Large-node tests
53TEST(Btree, map_int32_1024) { MapTest<int32_t, 1024>(); }
54TEST(Btree, map_int32_1032) { MapTest<int32_t, 1032>(); }
55TEST(Btree, map_int32_1040) { MapTest<int32_t, 1040>(); }
56TEST(Btree, map_int32_1048) { MapTest<int32_t, 1048>(); }
57TEST(Btree, map_int32_1056) { MapTest<int32_t, 1056>(); }
58
59TEST(Btree, map_int32_2048) { MapTest<int32_t, 2048>(); }
60TEST(Btree, map_int32_4096) { MapTest<int32_t, 4096>(); }
61TEST(Btree, set_int32_1024) { SetTest<int32_t, 1024>(); }
62TEST(Btree, set_int32_2048) { SetTest<int32_t, 2048>(); }
63TEST(Btree, set_int32_4096) { SetTest<int32_t, 4096>(); }
64TEST(Btree, map_string_1024) { MapTest<std::string, 1024>(); }
65TEST(Btree, map_string_2048) { MapTest<std::string, 2048>(); }
66TEST(Btree, map_string_4096) { MapTest<std::string, 4096>(); }
67TEST(Btree, set_string_1024) { SetTest<std::string, 1024>(); }
68TEST(Btree, set_string_2048) { SetTest<std::string, 2048>(); }
69TEST(Btree, set_string_4096) { SetTest<std::string, 4096>(); }
70
71template <typename K, int N>
72void MultiSetTest() {
73 typedef TestAllocator<K> TestAlloc;
74 ASSERT_EQ(sizeof(btree_multiset<K>), sizeof(void*));
75 BtreeMultiTest<btree_multiset<K, std::less<K>, std::allocator<K>, N>,
76 std::multiset<K> >();
77 BtreeAllocatorTest<btree_multiset<K, std::less<K>, TestAlloc, N> >();
78}
79
80template <typename K, int N>
81void MultiMapTest() {
82 typedef TestAllocator<K> TestAlloc;
83 ASSERT_EQ(sizeof(btree_multimap<K, K>), sizeof(void*));
84 BtreeMultiTest<btree_multimap<K, K, std::less<K>, std::allocator<K>, N>,
85 std::multimap<K, K> >();
86 BtreeMultiMapTest<btree_multimap<K, K, std::less<K>, std::allocator<K>, N> >();
87 BtreeAllocatorTest<btree_multimap<K, K, std::less<K>, TestAlloc, N> >();
88}
89
90TEST(Btree, multiset_int32_256) { MultiSetTest<int32_t, 256>(); }
91TEST(Btree, multiset_int64_256) { MultiSetTest<int64_t, 256>(); }
92TEST(Btree, multiset_string_256) { MultiSetTest<std::string, 256>(); }
93TEST(Btree, multiset_pair_256) { MultiSetTest<std::pair<int, int>, 256>(); }
94TEST(Btree, multimap_int32_256) { MultiMapTest<int32_t, 256>(); }
95TEST(Btree, multimap_int64_256) { MultiMapTest<int64_t, 256>(); }
96TEST(Btree, multimap_string_256) { MultiMapTest<std::string, 256>(); }
97TEST(Btree, multimap_pair_256) { MultiMapTest<std::pair<int, int>, 256>(); }
98
99// Large-node tests
100TEST(Btree, multimap_int32_1024) { MultiMapTest<int32_t, 1024>(); }
101TEST(Btree, multimap_int32_2048) { MultiMapTest<int32_t, 2048>(); }
102TEST(Btree, multimap_int32_4096) { MultiMapTest<int32_t, 4096>(); }
103TEST(Btree, multiset_int32_1024) { MultiSetTest<int32_t, 1024>(); }
104TEST(Btree, multiset_int32_2048) { MultiSetTest<int32_t, 2048>(); }
105TEST(Btree, multiset_int32_4096) { MultiSetTest<int32_t, 4096>(); }
106TEST(Btree, multimap_string_1024) { MultiMapTest<std::string, 1024>(); }
107TEST(Btree, multimap_string_2048) { MultiMapTest<std::string, 2048>(); }
108TEST(Btree, multimap_string_4096) { MultiMapTest<std::string, 4096>(); }
109TEST(Btree, multiset_string_1024) { MultiSetTest<std::string, 1024>(); }
110TEST(Btree, multiset_string_2048) { MultiSetTest<std::string, 2048>(); }
111TEST(Btree, multiset_string_4096) { MultiSetTest<std::string, 4096>(); }
112
113// Verify that swapping btrees swaps the key comparision functors.
114struct SubstringLess {
115 SubstringLess() : n(2) {}
116 SubstringLess(size_t length)
117 : n(length) {
118 }
119 bool operator()(const std::string &a, const std::string &b) const {
120 std::string as(a.data(), std::min(n, a.size()));
121 std::string bs(b.data(), std::min(n, b.size()));
122 return as < bs;
123 }
124 size_t n;
125};
126
127TEST(Btree, SwapKeyCompare) {
128 typedef btree_set<std::string, SubstringLess> SubstringSet;
129 SubstringSet s1(SubstringLess(1), SubstringSet::allocator_type());
130 SubstringSet s2(SubstringLess(2), SubstringSet::allocator_type());
131
132 ASSERT_TRUE(s1.insert("a").second);
133 ASSERT_FALSE(s1.insert("aa").second);
134
135 ASSERT_TRUE(s2.insert("a").second);
136 ASSERT_TRUE(s2.insert("aa").second);
137 ASSERT_FALSE(s2.insert("aaa").second);
138
139 swap(s1, s2);
140
141 ASSERT_TRUE(s1.insert("b").second);
142 ASSERT_TRUE(s1.insert("bb").second);
143 ASSERT_FALSE(s1.insert("bbb").second);
144
145 ASSERT_TRUE(s2.insert("b").second);
146 ASSERT_FALSE(s2.insert("bb").second);
147}
148
149TEST(Btree, UpperBoundRegression) {
150 // Regress a bug where upper_bound would default-construct a new key_compare
151 // instead of copying the existing one.
152 typedef btree_set<std::string, SubstringLess> SubstringSet;
153 SubstringSet my_set(SubstringLess(3));
154 my_set.insert("aab");
155 my_set.insert("abb");
156 // We call upper_bound("aaa"). If this correctly uses the length 3
157 // comparator, aaa < aab < abb, so we should get aab as the result.
158 // If it instead uses the default-constructed length 2 comparator,
159 // aa == aa < ab, so we'll get abb as our result.
160 SubstringSet::iterator it = my_set.upper_bound("aaa");
161 ASSERT_TRUE(it != my_set.end());
162 EXPECT_EQ("aab", *it);
163}
164
165
166TEST(Btree, IteratorIncrementBy) {
167 // Test that increment_by returns the same position as increment.
168 const int kSetSize = 2341;
169 btree_set<int32_t> my_set;
170 for (int i = 0; i < kSetSize; ++i) {
171 my_set.insert(i);
172 }
173
174 {
175 // Simple increment vs. increment by.
176 btree_set<int32_t>::iterator a = my_set.begin();
177 btree_set<int32_t>::iterator b = my_set.begin();
178 a.increment();
179 b.increment_by(1);
180 EXPECT_EQ(*a, *b);
181 }
182
183 btree_set<int32_t>::iterator a = my_set.begin();
184 for (int i = 1; i < kSetSize; ++i) {
185 ++a;
186 // increment_by
187 btree_set<int32_t>::iterator b = my_set.begin();
188 b.increment_by(i);
189 EXPECT_EQ(*a, *b) << ": i=" << i;
190 }
191}
192
193TEST(Btree, Comparison) {
194 const int kSetSize = 1201;
195 btree_set<int64_t> my_set;
196 for (int i = 0; i < kSetSize; ++i) {
197 my_set.insert(i);
198 }
199 btree_set<int64_t> my_set_copy(my_set);
200 EXPECT_TRUE(my_set_copy == my_set);
201 EXPECT_TRUE(my_set == my_set_copy);
202 EXPECT_FALSE(my_set_copy != my_set);
203 EXPECT_FALSE(my_set != my_set_copy);
204
205 my_set.insert(kSetSize);
206 EXPECT_FALSE(my_set_copy == my_set);
207 EXPECT_FALSE(my_set == my_set_copy);
208 EXPECT_TRUE(my_set_copy != my_set);
209 EXPECT_TRUE(my_set != my_set_copy);
210
211 my_set.erase(kSetSize - 1);
212 EXPECT_FALSE(my_set_copy == my_set);
213 EXPECT_FALSE(my_set == my_set_copy);
214 EXPECT_TRUE(my_set_copy != my_set);
215 EXPECT_TRUE(my_set != my_set_copy);
216
217 btree_map<std::string, int64_t> my_map;
218 for (int i = 0; i < kSetSize; ++i) {
219 my_map[std::string(i, 'a')] = i;
220 }
221 btree_map<std::string, int64_t> my_map_copy(my_map);
222 EXPECT_TRUE(my_map_copy == my_map);
223 EXPECT_TRUE(my_map == my_map_copy);
224 EXPECT_FALSE(my_map_copy != my_map);
225 EXPECT_FALSE(my_map != my_map_copy);
226
227 ++my_map_copy[std::string(7, 'a')];
228 EXPECT_FALSE(my_map_copy == my_map);
229 EXPECT_FALSE(my_map == my_map_copy);
230 EXPECT_TRUE(my_map_copy != my_map);
231 EXPECT_TRUE(my_map != my_map_copy);
232
233 my_map_copy = my_map;
234 my_map["hello"] = kSetSize;
235 EXPECT_FALSE(my_map_copy == my_map);
236 EXPECT_FALSE(my_map == my_map_copy);
237 EXPECT_TRUE(my_map_copy != my_map);
238 EXPECT_TRUE(my_map != my_map_copy);
239
240 my_map.erase(std::string(kSetSize - 1, 'a'));
241 EXPECT_FALSE(my_map_copy == my_map);
242 EXPECT_FALSE(my_map == my_map_copy);
243 EXPECT_TRUE(my_map_copy != my_map);
244 EXPECT_TRUE(my_map != my_map_copy);
245}
246
247TEST(Btree, RangeCtorSanity) {
248 typedef btree_set<int, std::less<int>, std::allocator<int>, 256> test_set;
249 typedef btree_map<int, int, std::less<int>, std::allocator<int>, 256>
250 test_map;
251 typedef btree_multiset<int, std::less<int>, std::allocator<int>, 256>
252 test_mset;
253 typedef btree_multimap<int, int, std::less<int>, std::allocator<int>, 256>
254 test_mmap;
255 std::vector<int> ivec;
256 ivec.push_back(1);
257 std::map<int, int> imap;
258 imap.insert(std::make_pair(1, 2));
259 test_mset tmset(ivec.begin(), ivec.end());
260 test_mmap tmmap(imap.begin(), imap.end());
261 test_set tset(ivec.begin(), ivec.end());
262 test_map tmap(imap.begin(), imap.end());
263 EXPECT_EQ(1, tmset.size());
264 EXPECT_EQ(1, tmmap.size());
265 EXPECT_EQ(1, tset.size());
266 EXPECT_EQ(1, tmap.size());
267}
268
269} // namespace
270} // namespace btree
diff --git a/xdelta3/cpp-btree/btree_test.h b/xdelta3/cpp-btree/btree_test.h
new file mode 100644
index 0000000..413dc3c
--- /dev/null
+++ b/xdelta3/cpp-btree/btree_test.h
@@ -0,0 +1,940 @@
1// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef UTIL_BTREE_BTREE_TEST_H__
16#define UTIL_BTREE_BTREE_TEST_H__
17
18#include <stdio.h>
19#include <algorithm>
20#include <functional>
21#include <type_traits>
22#include <iosfwd>
23#include <map>
24#include <set>
25#include <sstream>
26#include <string>
27#include <utility>
28#include <vector>
29
30#include "gtest/gtest.h"
31#include "gflags/gflags.h"
32#include "btree_container.h"
33
34DECLARE_int32(test_values);
35DECLARE_int32(benchmark_values);
36
37namespace std {
38
39// Provide operator<< support for std::pair<T, U>.
40template <typename T, typename U>
41ostream& operator<<(ostream &os, const std::pair<T, U> &p) {
42 os << "(" << p.first << "," << p.second << ")";
43 return os;
44}
45
46// Provide pair equality testing that works as long as x.first is comparable to
47// y.first and x.second is comparable to y.second. Needed in the test for
48// comparing std::pair<T, U> to std::pair<const T, U>.
49template <typename T, typename U, typename V, typename W>
50bool operator==(const std::pair<T, U> &x, const std::pair<V, W> &y) {
51 return x.first == y.first && x.second == y.second;
52}
53
54// Partial specialization of remove_const that propagates the removal through
55// std::pair.
56template <typename T, typename U>
57struct remove_const<pair<T, U> > {
58 typedef pair<typename remove_const<T>::type,
59 typename remove_const<U>::type> type;
60};
61
62} // namespace std
63
64namespace btree {
65
66// Select the first member of a pair.
67template <class _Pair>
68struct select1st : public std::unary_function<_Pair, typename _Pair::first_type> {
69 const typename _Pair::first_type& operator()(const _Pair& __x) const {
70 return __x.first;
71 }
72};
73
74// Utility class to provide an accessor for a key given a value. The default
75// behavior is to treat the value as a pair and return the first element.
76template <typename K, typename V>
77struct KeyOfValue {
78 typedef select1st<V> type;
79};
80
81template <typename T>
82struct identity {
83 inline const T& operator()(const T& t) const { return t; }
84};
85
86// Partial specialization of KeyOfValue class for when the key and value are
87// the same type such as in set<> and btree_set<>.
88template <typename K>
89struct KeyOfValue<K, K> {
90 typedef identity<K> type;
91};
92
93// Counts the number of occurances of "c" in a buffer.
94inline ptrdiff_t strcount(const char* buf_begin, const char* buf_end, char c) {
95 if (buf_begin == NULL)
96 return 0;
97 if (buf_end <= buf_begin)
98 return 0;
99 ptrdiff_t num = 0;
100 for (const char* bp = buf_begin; bp != buf_end; bp++) {
101 if (*bp == c)
102 num++;
103 }
104 return num;
105}
106
107// for when the string is not null-terminated.
108inline ptrdiff_t strcount(const char* buf, size_t len, char c) {
109 return strcount(buf, buf + len, c);
110}
111
112inline ptrdiff_t strcount(const std::string& buf, char c) {
113 return strcount(buf.c_str(), buf.size(), c);
114}
115
116// The base class for a sorted associative container checker. TreeType is the
117// container type to check and CheckerType is the container type to check
118// against. TreeType is expected to be btree_{set,map,multiset,multimap} and
119// CheckerType is expected to be {set,map,multiset,multimap}.
120template <typename TreeType, typename CheckerType>
121class base_checker {
122 typedef base_checker<TreeType, CheckerType> self_type;
123
124 public:
125 typedef typename TreeType::key_type key_type;
126 typedef typename TreeType::value_type value_type;
127 typedef typename TreeType::key_compare key_compare;
128 typedef typename TreeType::pointer pointer;
129 typedef typename TreeType::const_pointer const_pointer;
130 typedef typename TreeType::reference reference;
131 typedef typename TreeType::const_reference const_reference;
132 typedef typename TreeType::size_type size_type;
133 typedef typename TreeType::difference_type difference_type;
134 typedef typename TreeType::iterator iterator;
135 typedef typename TreeType::const_iterator const_iterator;
136 typedef typename TreeType::reverse_iterator reverse_iterator;
137 typedef typename TreeType::const_reverse_iterator const_reverse_iterator;
138
139 public:
140 // Default constructor.
141 base_checker()
142 : const_tree_(tree_) {
143 }
144 // Copy constructor.
145 base_checker(const self_type &x)
146 : tree_(x.tree_),
147 const_tree_(tree_),
148 checker_(x.checker_) {
149 }
150 // Range constructor.
151 template <typename InputIterator>
152 base_checker(InputIterator b, InputIterator e)
153 : tree_(b, e),
154 const_tree_(tree_),
155 checker_(b, e) {
156 }
157
158 // Iterator routines.
159 iterator begin() { return tree_.begin(); }
160 const_iterator begin() const { return tree_.begin(); }
161 iterator end() { return tree_.end(); }
162 const_iterator end() const { return tree_.end(); }
163 reverse_iterator rbegin() { return tree_.rbegin(); }
164 const_reverse_iterator rbegin() const { return tree_.rbegin(); }
165 reverse_iterator rend() { return tree_.rend(); }
166 const_reverse_iterator rend() const { return tree_.rend(); }
167
168 // Helper routines.
169 template <typename IterType, typename CheckerIterType>
170 IterType iter_check(
171 IterType tree_iter, CheckerIterType checker_iter) const {
172 if (tree_iter == tree_.end()) {
173 EXPECT_EQ(checker_iter, checker_.end());
174 } else {
175 EXPECT_EQ(*tree_iter, *checker_iter);
176 }
177 return tree_iter;
178 }
179 template <typename IterType, typename CheckerIterType>
180 IterType riter_check(
181 IterType tree_iter, CheckerIterType checker_iter) const {
182 if (tree_iter == tree_.rend()) {
183 EXPECT_EQ(checker_iter, checker_.rend());
184 } else {
185 EXPECT_EQ(*tree_iter, *checker_iter);
186 }
187 return tree_iter;
188 }
189 void value_check(const value_type &x) {
190 typename KeyOfValue<typename TreeType::key_type,
191 typename TreeType::value_type>::type key_of_value;
192 const key_type &key = key_of_value(x);
193 EXPECT_EQ(*find(key), x);
194 lower_bound(key);
195 upper_bound(key);
196 equal_range(key);
197 count(key);
198 }
199 void erase_check(const key_type &key) {
200 EXPECT_TRUE(tree_.find(key) == const_tree_.end());
201 EXPECT_TRUE(const_tree_.find(key) == tree_.end());
202 EXPECT_TRUE(tree_.equal_range(key).first ==
203 const_tree_.equal_range(key).second);
204 }
205
206 // Lookup routines.
207 iterator lower_bound(const key_type &key) {
208 return iter_check(tree_.lower_bound(key), checker_.lower_bound(key));
209 }
210 const_iterator lower_bound(const key_type &key) const {
211 return iter_check(tree_.lower_bound(key), checker_.lower_bound(key));
212 }
213 iterator upper_bound(const key_type &key) {
214 return iter_check(tree_.upper_bound(key), checker_.upper_bound(key));
215 }
216 const_iterator upper_bound(const key_type &key) const {
217 return iter_check(tree_.upper_bound(key), checker_.upper_bound(key));
218 }
219 std::pair<iterator,iterator> equal_range(const key_type &key) {
220 std::pair<typename CheckerType::iterator,
221 typename CheckerType::iterator> checker_res =
222 checker_.equal_range(key);
223 std::pair<iterator, iterator> tree_res = tree_.equal_range(key);
224 iter_check(tree_res.first, checker_res.first);
225 iter_check(tree_res.second, checker_res.second);
226 return tree_res;
227 }
228 std::pair<const_iterator,const_iterator> equal_range(const key_type &key) const {
229 std::pair<typename CheckerType::const_iterator,
230 typename CheckerType::const_iterator> checker_res =
231 checker_.equal_range(key);
232 std::pair<const_iterator, const_iterator> tree_res = tree_.equal_range(key);
233 iter_check(tree_res.first, checker_res.first);
234 iter_check(tree_res.second, checker_res.second);
235 return tree_res;
236 }
237 iterator find(const key_type &key) {
238 return iter_check(tree_.find(key), checker_.find(key));
239 }
240 const_iterator find(const key_type &key) const {
241 return iter_check(tree_.find(key), checker_.find(key));
242 }
243 size_type count(const key_type &key) const {
244 size_type res = checker_.count(key);
245 EXPECT_EQ(res, tree_.count(key));
246 return res;
247 }
248
249 // Assignment operator.
250 self_type& operator=(const self_type &x) {
251 tree_ = x.tree_;
252 checker_ = x.checker_;
253 return *this;
254 }
255
256 // Deletion routines.
257 int erase(const key_type &key) {
258 int size = tree_.size();
259 int res = checker_.erase(key);
260 EXPECT_EQ(res, tree_.count(key));
261 EXPECT_EQ(res, tree_.erase(key));
262 EXPECT_EQ(tree_.count(key), 0);
263 EXPECT_EQ(tree_.size(), size - res);
264 erase_check(key);
265 return res;
266 }
267 iterator erase(iterator iter) {
268 key_type key = iter.key();
269 int size = tree_.size();
270 int count = tree_.count(key);
271 typename CheckerType::iterator checker_iter = checker_.find(key);
272 for (iterator tmp(tree_.find(key)); tmp != iter; ++tmp) {
273 ++checker_iter;
274 }
275 typename CheckerType::iterator checker_next = checker_iter;
276 ++checker_next;
277 checker_.erase(checker_iter);
278 iter = tree_.erase(iter);
279 EXPECT_EQ(tree_.size(), checker_.size());
280 EXPECT_EQ(tree_.size(), size - 1);
281 EXPECT_EQ(tree_.count(key), count - 1);
282 if (count == 1) {
283 erase_check(key);
284 }
285 return iter_check(iter, checker_next);
286 }
287
288 void erase(iterator begin, iterator end) {
289 int size = tree_.size();
290 int count = distance(begin, end);
291 typename CheckerType::iterator checker_begin = checker_.find(begin.key());
292 for (iterator tmp(tree_.find(begin.key())); tmp != begin; ++tmp) {
293 ++checker_begin;
294 }
295 typename CheckerType::iterator checker_end =
296 end == tree_.end() ? checker_.end() : checker_.find(end.key());
297 if (end != tree_.end()) {
298 for (iterator tmp(tree_.find(end.key())); tmp != end; ++tmp) {
299 ++checker_end;
300 }
301 }
302 checker_.erase(checker_begin, checker_end);
303 tree_.erase(begin, end);
304 EXPECT_EQ(tree_.size(), checker_.size());
305 EXPECT_EQ(tree_.size(), size - count);
306 }
307
308 // Utility routines.
309 void clear() {
310 tree_.clear();
311 checker_.clear();
312 }
313 void swap(self_type &x) {
314 tree_.swap(x.tree_);
315 checker_.swap(x.checker_);
316 }
317
318 void verify() const {
319 tree_.verify();
320 EXPECT_EQ(tree_.size(), checker_.size());
321
322 // Move through the forward iterators using increment.
323 typename CheckerType::const_iterator
324 checker_iter(checker_.begin());
325 const_iterator tree_iter(tree_.begin());
326 for (; tree_iter != tree_.end();
327 ++tree_iter, ++checker_iter) {
328 EXPECT_EQ(*tree_iter, *checker_iter);
329 }
330
331 // Move through the forward iterators using decrement.
332 for (int n = tree_.size() - 1; n >= 0; --n) {
333 iter_check(tree_iter, checker_iter);
334 --tree_iter;
335 --checker_iter;
336 }
337 EXPECT_TRUE(tree_iter == tree_.begin());
338 EXPECT_TRUE(checker_iter == checker_.begin());
339
340 // Move through the reverse iterators using increment.
341 typename CheckerType::const_reverse_iterator
342 checker_riter(checker_.rbegin());
343 const_reverse_iterator tree_riter(tree_.rbegin());
344 for (; tree_riter != tree_.rend();
345 ++tree_riter, ++checker_riter) {
346 EXPECT_EQ(*tree_riter, *checker_riter);
347 }
348
349 // Move through the reverse iterators using decrement.
350 for (int n = tree_.size() - 1; n >= 0; --n) {
351 riter_check(tree_riter, checker_riter);
352 --tree_riter;
353 --checker_riter;
354 }
355 EXPECT_EQ(tree_riter, tree_.rbegin());
356 EXPECT_EQ(checker_riter, checker_.rbegin());
357 }
358
359 // Access to the underlying btree.
360 const TreeType& tree() const { return tree_; }
361
362 // Size routines.
363 size_type size() const {
364 EXPECT_EQ(tree_.size(), checker_.size());
365 return tree_.size();
366 }
367 size_type max_size() const { return tree_.max_size(); }
368 bool empty() const {
369 EXPECT_EQ(tree_.empty(), checker_.empty());
370 return tree_.empty();
371 }
372 size_type height() const { return tree_.height(); }
373 size_type internal_nodes() const { return tree_.internal_nodes(); }
374 size_type leaf_nodes() const { return tree_.leaf_nodes(); }
375 size_type nodes() const { return tree_.nodes(); }
376 size_type bytes_used() const { return tree_.bytes_used(); }
377 double fullness() const { return tree_.fullness(); }
378 double overhead() const { return tree_.overhead(); }
379
380 protected:
381 TreeType tree_;
382 const TreeType &const_tree_;
383 CheckerType checker_;
384};
385
386// A checker for unique sorted associative containers. TreeType is expected to
387// be btree_{set,map} and CheckerType is expected to be {set,map}.
388template <typename TreeType, typename CheckerType>
389class unique_checker : public base_checker<TreeType, CheckerType> {
390 typedef base_checker<TreeType, CheckerType> super_type;
391 typedef unique_checker<TreeType, CheckerType> self_type;
392
393 public:
394 typedef typename super_type::iterator iterator;
395 typedef typename super_type::value_type value_type;
396
397 public:
398 // Default constructor.
399 unique_checker()
400 : super_type() {
401 }
402 // Copy constructor.
403 unique_checker(const self_type &x)
404 : super_type(x) {
405 }
406 // Range constructor.
407 template <class InputIterator>
408 unique_checker(InputIterator b, InputIterator e)
409 : super_type(b, e) {
410 }
411
412 // Insertion routines.
413 std::pair<iterator,bool> insert(const value_type &x) {
414 int size = this->tree_.size();
415 std::pair<typename CheckerType::iterator,bool> checker_res =
416 this->checker_.insert(x);
417 std::pair<iterator,bool> tree_res = this->tree_.insert(x);
418 EXPECT_EQ(*tree_res.first, *checker_res.first);
419 EXPECT_EQ(tree_res.second, checker_res.second);
420 EXPECT_EQ(this->tree_.size(), this->checker_.size());
421 EXPECT_EQ(this->tree_.size(), size + tree_res.second);
422 return tree_res;
423 }
424 iterator insert(iterator position, const value_type &x) {
425 int size = this->tree_.size();
426 std::pair<typename CheckerType::iterator,bool> checker_res =
427 this->checker_.insert(x);
428 iterator tree_res = this->tree_.insert(position, x);
429 EXPECT_EQ(*tree_res, *checker_res.first);
430 EXPECT_EQ(this->tree_.size(), this->checker_.size());
431 EXPECT_EQ(this->tree_.size(), size + checker_res.second);
432 return tree_res;
433 }
434 template <typename InputIterator>
435 void insert(InputIterator b, InputIterator e) {
436 for (; b != e; ++b) {
437 insert(*b);
438 }
439 }
440};
441
442// A checker for multiple sorted associative containers. TreeType is expected
443// to be btree_{multiset,multimap} and CheckerType is expected to be
444// {multiset,multimap}.
445template <typename TreeType, typename CheckerType>
446class multi_checker : public base_checker<TreeType, CheckerType> {
447 typedef base_checker<TreeType, CheckerType> super_type;
448 typedef multi_checker<TreeType, CheckerType> self_type;
449
450 public:
451 typedef typename super_type::iterator iterator;
452 typedef typename super_type::value_type value_type;
453
454 public:
455 // Default constructor.
456 multi_checker()
457 : super_type() {
458 }
459 // Copy constructor.
460 multi_checker(const self_type &x)
461 : super_type(x) {
462 }
463 // Range constructor.
464 template <class InputIterator>
465 multi_checker(InputIterator b, InputIterator e)
466 : super_type(b, e) {
467 }
468
469 // Insertion routines.
470 iterator insert(const value_type &x) {
471 int size = this->tree_.size();
472 typename CheckerType::iterator checker_res = this->checker_.insert(x);
473 iterator tree_res = this->tree_.insert(x);
474 EXPECT_EQ(*tree_res, *checker_res);
475 EXPECT_EQ(this->tree_.size(), this->checker_.size());
476 EXPECT_EQ(this->tree_.size(), size + 1);
477 return tree_res;
478 }
479 iterator insert(iterator position, const value_type &x) {
480 int size = this->tree_.size();
481 typename CheckerType::iterator checker_res = this->checker_.insert(x);
482 iterator tree_res = this->tree_.insert(position, x);
483 EXPECT_EQ(*tree_res, *checker_res);
484 EXPECT_EQ(this->tree_.size(), this->checker_.size());
485 EXPECT_EQ(this->tree_.size(), size + 1);
486 return tree_res;
487 }
488 template <typename InputIterator>
489 void insert(InputIterator b, InputIterator e) {
490 for (; b != e; ++b) {
491 insert(*b);
492 }
493 }
494};
495
496char* GenerateDigits(char buf[16], int val, int maxval) {
497 EXPECT_LE(val, maxval);
498 int p = 15;
499 buf[p--] = 0;
500 while (maxval > 0) {
501 buf[p--] = '0' + (val % 10);
502 val /= 10;
503 maxval /= 10;
504 }
505 return buf + p + 1;
506}
507
508template <typename K>
509struct Generator {
510 int maxval;
511 Generator(int m)
512 : maxval(m) {
513 }
514 K operator()(int i) const {
515 EXPECT_LE(i, maxval);
516 return i;
517 }
518};
519
520template <>
521struct Generator<std::string> {
522 int maxval;
523 Generator(int m)
524 : maxval(m) {
525 }
526 std::string operator()(int i) const {
527 char buf[16];
528 return GenerateDigits(buf, i, maxval);
529 }
530};
531
532template <typename T, typename U>
533struct Generator<std::pair<T, U> > {
534 Generator<typename std::remove_const<T>::type> tgen;
535 Generator<typename std::remove_const<U>::type> ugen;
536
537 Generator(int m)
538 : tgen(m),
539 ugen(m) {
540 }
541 std::pair<T, U> operator()(int i) const {
542 return std::make_pair(tgen(i), ugen(i));
543 }
544};
545
546// Generate values for our tests and benchmarks. Value range is [0, maxval].
547const std::vector<int>& GenerateNumbers(int n, int maxval) {
548 static std::vector<int> values;
549 static std::set<int> unique_values;
550
551 if (values.size() < n) {
552
553 for (int i = values.size(); i < n; i++) {
554 int value;
555 do {
556 value = rand() % (maxval + 1);
557 } while (unique_values.find(value) != unique_values.end());
558
559 values.push_back(value);
560 unique_values.insert(value);
561 }
562 }
563
564 return values;
565}
566
567// Generates values in the range
568// [0, 4 * min(FLAGS_benchmark_values, FLAGS_test_values)]
569template <typename V>
570std::vector<V> GenerateValues(int n) {
571 int two_times_max = 2 * std::max(FLAGS_benchmark_values, FLAGS_test_values);
572 int four_times_max = 2 * two_times_max;
573 EXPECT_LE(n, two_times_max);
574 const std::vector<int> &nums = GenerateNumbers(n, four_times_max);
575 Generator<V> gen(four_times_max);
576 std::vector<V> vec;
577
578 for (int i = 0; i < n; i++) {
579 vec.push_back(gen(nums[i]));
580 }
581
582 return vec;
583}
584
585template <typename T, typename V>
586void DoTest(const char *name, T *b, const std::vector<V> &values) {
587 typename KeyOfValue<typename T::key_type, V>::type key_of_value;
588
589 T &mutable_b = *b;
590 const T &const_b = *b;
591
592 // Test insert.
593 for (int i = 0; i < values.size(); ++i) {
594 mutable_b.insert(values[i]);
595 mutable_b.value_check(values[i]);
596 }
597 assert(mutable_b.size() == values.size());
598
599 const_b.verify();
600 printf(" %s fullness=%0.2f overhead=%0.2f bytes-per-value=%0.2f\n",
601 name, const_b.fullness(), const_b.overhead(),
602 double(const_b.bytes_used()) / const_b.size());
603
604 // Test copy constructor.
605 T b_copy(const_b);
606 EXPECT_EQ(b_copy.size(), const_b.size());
607 EXPECT_LE(b_copy.height(), const_b.height());
608 EXPECT_LE(b_copy.internal_nodes(), const_b.internal_nodes());
609 EXPECT_LE(b_copy.leaf_nodes(), const_b.leaf_nodes());
610 for (int i = 0; i < values.size(); ++i) {
611 EXPECT_EQ(*b_copy.find(key_of_value(values[i])), values[i]);
612 }
613
614 // Test range constructor.
615 T b_range(const_b.begin(), const_b.end());
616 EXPECT_EQ(b_range.size(), const_b.size());
617 EXPECT_LE(b_range.height(), const_b.height());
618 EXPECT_LE(b_range.internal_nodes(), const_b.internal_nodes());
619 EXPECT_LE(b_range.leaf_nodes(), const_b.leaf_nodes());
620 for (int i = 0; i < values.size(); ++i) {
621 EXPECT_EQ(*b_range.find(key_of_value(values[i])), values[i]);
622 }
623
624 // Test range insertion for values that already exist.
625 b_range.insert(b_copy.begin(), b_copy.end());
626 b_range.verify();
627
628 // Test range insertion for new values.
629 b_range.clear();
630 b_range.insert(b_copy.begin(), b_copy.end());
631 EXPECT_EQ(b_range.size(), b_copy.size());
632 EXPECT_EQ(b_range.height(), b_copy.height());
633 EXPECT_EQ(b_range.internal_nodes(), b_copy.internal_nodes());
634 EXPECT_EQ(b_range.leaf_nodes(), b_copy.leaf_nodes());
635 for (int i = 0; i < values.size(); ++i) {
636 EXPECT_EQ(*b_range.find(key_of_value(values[i])), values[i]);
637 }
638
639 // Test assignment to self. Nothing should change.
640 b_range.operator=(b_range);
641 EXPECT_EQ(b_range.size(), b_copy.size());
642 EXPECT_EQ(b_range.height(), b_copy.height());
643 EXPECT_EQ(b_range.internal_nodes(), b_copy.internal_nodes());
644 EXPECT_EQ(b_range.leaf_nodes(), b_copy.leaf_nodes());
645
646 // Test assignment of new values.
647 b_range.clear();
648 b_range = b_copy;
649 EXPECT_EQ(b_range.size(), b_copy.size());
650 EXPECT_EQ(b_range.height(), b_copy.height());
651 EXPECT_EQ(b_range.internal_nodes(), b_copy.internal_nodes());
652 EXPECT_EQ(b_range.leaf_nodes(), b_copy.leaf_nodes());
653
654 // Test swap.
655 b_range.clear();
656 b_range.swap(b_copy);
657 EXPECT_EQ(b_copy.size(), 0);
658 EXPECT_EQ(b_range.size(), const_b.size());
659 for (int i = 0; i < values.size(); ++i) {
660 EXPECT_EQ(*b_range.find(key_of_value(values[i])), values[i]);
661 }
662 b_range.swap(b_copy);
663
664 // Test erase via values.
665 for (int i = 0; i < values.size(); ++i) {
666 mutable_b.erase(key_of_value(values[i]));
667 // Erasing a non-existent key should have no effect.
668 EXPECT_EQ(mutable_b.erase(key_of_value(values[i])), 0);
669 }
670
671 const_b.verify();
672 EXPECT_EQ(const_b.internal_nodes(), 0);
673 EXPECT_EQ(const_b.leaf_nodes(), 0);
674 EXPECT_EQ(const_b.size(), 0);
675
676 // Test erase via iterators.
677 mutable_b = b_copy;
678 for (int i = 0; i < values.size(); ++i) {
679 mutable_b.erase(mutable_b.find(key_of_value(values[i])));
680 }
681
682 const_b.verify();
683 EXPECT_EQ(const_b.internal_nodes(), 0);
684 EXPECT_EQ(const_b.leaf_nodes(), 0);
685 EXPECT_EQ(const_b.size(), 0);
686
687 // Test insert with hint.
688 for (int i = 0; i < values.size(); i++) {
689 mutable_b.insert(mutable_b.upper_bound(key_of_value(values[i])), values[i]);
690 }
691
692 const_b.verify();
693
694 // Test dumping of the btree to an ostream. There should be 1 line for each
695 // value.
696 std::stringstream strm;
697 strm << mutable_b.tree();
698 EXPECT_EQ(mutable_b.size(), strcount(strm.str(), '\n'));
699
700 // Test range erase.
701 mutable_b.erase(mutable_b.begin(), mutable_b.end());
702 EXPECT_EQ(mutable_b.size(), 0);
703 const_b.verify();
704
705 // First half.
706 mutable_b = b_copy;
707 typename T::iterator mutable_iter_end = mutable_b.begin();
708 for (int i = 0; i < values.size() / 2; ++i) ++mutable_iter_end;
709 mutable_b.erase(mutable_b.begin(), mutable_iter_end);
710 EXPECT_EQ(mutable_b.size(), values.size() - values.size() / 2);
711 const_b.verify();
712
713 // Second half.
714 mutable_b = b_copy;
715 typename T::iterator mutable_iter_begin = mutable_b.begin();
716 for (int i = 0; i < values.size() / 2; ++i) ++mutable_iter_begin;
717 mutable_b.erase(mutable_iter_begin, mutable_b.end());
718 EXPECT_EQ(mutable_b.size(), values.size() / 2);
719 const_b.verify();
720
721 // Second quarter.
722 mutable_b = b_copy;
723 mutable_iter_begin = mutable_b.begin();
724 for (int i = 0; i < values.size() / 4; ++i) ++mutable_iter_begin;
725 mutable_iter_end = mutable_iter_begin;
726 for (int i = 0; i < values.size() / 4; ++i) ++mutable_iter_end;
727 mutable_b.erase(mutable_iter_begin, mutable_iter_end);
728 EXPECT_EQ(mutable_b.size(), values.size() - values.size() / 4);
729 const_b.verify();
730
731 mutable_b.clear();
732}
733
734template <typename T>
735void ConstTest() {
736 typedef typename T::value_type value_type;
737 typename KeyOfValue<typename T::key_type, value_type>::type key_of_value;
738
739 T mutable_b;
740 const T &const_b = mutable_b;
741
742 // Insert a single value into the container and test looking it up.
743 value_type value = Generator<value_type>(2)(2);
744 mutable_b.insert(value);
745 EXPECT_TRUE(mutable_b.find(key_of_value(value)) != const_b.end());
746 EXPECT_TRUE(const_b.find(key_of_value(value)) != mutable_b.end());
747 EXPECT_EQ(*const_b.lower_bound(key_of_value(value)), value);
748 EXPECT_TRUE(const_b.upper_bound(key_of_value(value)) == const_b.end());
749 EXPECT_EQ(*const_b.equal_range(key_of_value(value)).first, value);
750
751 // We can only create a non-const iterator from a non-const container.
752 typename T::iterator mutable_iter(mutable_b.begin());
753 EXPECT_TRUE(mutable_iter == const_b.begin());
754 EXPECT_TRUE(mutable_iter != const_b.end());
755 EXPECT_TRUE(const_b.begin() == mutable_iter);
756 EXPECT_TRUE(const_b.end() != mutable_iter);
757 typename T::reverse_iterator mutable_riter(mutable_b.rbegin());
758 EXPECT_TRUE(mutable_riter == const_b.rbegin());
759 EXPECT_TRUE(mutable_riter != const_b.rend());
760 EXPECT_TRUE(const_b.rbegin() == mutable_riter);
761 EXPECT_TRUE(const_b.rend() != mutable_riter);
762
763 // We can create a const iterator from a non-const iterator.
764 typename T::const_iterator const_iter(mutable_iter);
765 EXPECT_TRUE(const_iter == mutable_b.begin());
766 EXPECT_TRUE(const_iter != mutable_b.end());
767 EXPECT_TRUE(mutable_b.begin() == const_iter);
768 EXPECT_TRUE(mutable_b.end() != const_iter);
769 typename T::const_reverse_iterator const_riter(mutable_riter);
770 EXPECT_EQ(const_riter, mutable_b.rbegin());
771 EXPECT_TRUE(const_riter != mutable_b.rend());
772 EXPECT_EQ(mutable_b.rbegin(), const_riter);
773 EXPECT_TRUE(mutable_b.rend() != const_riter);
774
775 // Make sure various methods can be invoked on a const container.
776 const_b.verify();
777 EXPECT_FALSE(const_b.empty());
778 EXPECT_EQ(const_b.size(), 1);
779 EXPECT_GT(const_b.max_size(), 0);
780 EXPECT_EQ(const_b.height(), 1);
781 EXPECT_EQ(const_b.count(key_of_value(value)), 1);
782 EXPECT_EQ(const_b.internal_nodes(), 0);
783 EXPECT_EQ(const_b.leaf_nodes(), 1);
784 EXPECT_EQ(const_b.nodes(), 1);
785 EXPECT_GT(const_b.bytes_used(), 0);
786 EXPECT_GT(const_b.fullness(), 0);
787 EXPECT_GT(const_b.overhead(), 0);
788}
789
790template <typename T, typename C>
791void BtreeTest() {
792 ConstTest<T>();
793
794 typedef typename std::remove_const<typename T::value_type>::type V;
795 std::vector<V> random_values = GenerateValues<V>(FLAGS_test_values);
796
797 unique_checker<T, C> container;
798
799 // Test key insertion/deletion in sorted order.
800 std::vector<V> sorted_values(random_values);
801 sort(sorted_values.begin(), sorted_values.end());
802 DoTest("sorted: ", &container, sorted_values);
803
804 // Test key insertion/deletion in reverse sorted order.
805 reverse(sorted_values.begin(), sorted_values.end());
806 DoTest("rsorted: ", &container, sorted_values);
807
808 // Test key insertion/deletion in random order.
809 DoTest("random: ", &container, random_values);
810}
811
812template <typename T, typename C>
813void BtreeMultiTest() {
814 ConstTest<T>();
815
816 typedef typename std::remove_const<typename T::value_type>::type V;
817 const std::vector<V>& random_values = GenerateValues<V>(FLAGS_test_values);
818
819 multi_checker<T, C> container;
820
821 // Test keys in sorted order.
822 std::vector<V> sorted_values(random_values);
823 sort(sorted_values.begin(), sorted_values.end());
824 DoTest("sorted: ", &container, sorted_values);
825
826 // Test keys in reverse sorted order.
827 reverse(sorted_values.begin(), sorted_values.end());
828 DoTest("rsorted: ", &container, sorted_values);
829
830 // Test keys in random order.
831 DoTest("random: ", &container, random_values);
832
833 // Test keys in random order w/ duplicates.
834 std::vector<V> duplicate_values(random_values);
835 duplicate_values.insert(
836 duplicate_values.end(), random_values.begin(), random_values.end());
837 DoTest("duplicates:", &container, duplicate_values);
838
839 // Test all identical keys.
840 std::vector<V> identical_values(100);
841 fill(identical_values.begin(), identical_values.end(), Generator<V>(2)(2));
842 DoTest("identical: ", &container, identical_values);
843}
844
845template <typename T, typename Alloc = std::allocator<T> >
846class TestAllocator : public Alloc {
847 public:
848 typedef typename Alloc::pointer pointer;
849 typedef typename Alloc::size_type size_type;
850
851 TestAllocator() : bytes_used_(NULL) { }
852 TestAllocator(int64_t *bytes_used) : bytes_used_(bytes_used) { }
853
854 // Constructor used for rebinding
855 template <class U>
856 TestAllocator(const TestAllocator<U>& x)
857 : Alloc(x),
858 bytes_used_(x.bytes_used()) {
859 }
860
861 pointer allocate(size_type n, std::allocator<void>::const_pointer hint = 0) {
862 EXPECT_TRUE(bytes_used_ != NULL);
863 *bytes_used_ += n * sizeof(T);
864 return Alloc::allocate(n, hint);
865 }
866
867 void deallocate(pointer p, size_type n) {
868 Alloc::deallocate(p, n);
869 EXPECT_TRUE(bytes_used_ != NULL);
870 *bytes_used_ -= n * sizeof(T);
871 }
872
873 // Rebind allows an allocator<T> to be used for a different type
874 template <class U> struct rebind {
875 typedef TestAllocator<U, typename Alloc::template rebind<U>::other> other;
876 };
877
878 int64_t* bytes_used() const { return bytes_used_; }
879
880 private:
881 int64_t *bytes_used_;
882};
883
884template <typename T>
885void BtreeAllocatorTest() {
886 typedef typename T::value_type value_type;
887
888 int64_t alloc1 = 0;
889 int64_t alloc2 = 0;
890 T b1(typename T::key_compare(), &alloc1);
891 T b2(typename T::key_compare(), &alloc2);
892
893 // This should swap the allocators!
894 swap(b1, b2);
895
896 for (int i = 0; i < 1000; i++) {
897 b1.insert(Generator<value_type>(1000)(i));
898 }
899
900 // We should have allocated out of alloc2!
901 EXPECT_LE(b1.bytes_used(), alloc2 + sizeof(b1));
902 EXPECT_GT(alloc2, alloc1);
903}
904
905template <typename T>
906void BtreeMapTest() {
907 typedef typename T::value_type value_type;
908 typedef typename T::mapped_type mapped_type;
909
910 mapped_type m = Generator<mapped_type>(0)(0);
911 (void) m;
912
913 T b;
914
915 // Verify we can insert using operator[].
916 for (int i = 0; i < 1000; i++) {
917 value_type v = Generator<value_type>(1000)(i);
918 b[v.first] = v.second;
919 }
920 EXPECT_EQ(b.size(), 1000);
921
922 // Test whether we can use the "->" operator on iterators and
923 // reverse_iterators. This stresses the btree_map_params::pair_pointer
924 // mechanism.
925 EXPECT_EQ(b.begin()->first, Generator<value_type>(1000)(0).first);
926 EXPECT_EQ(b.begin()->second, Generator<value_type>(1000)(0).second);
927 EXPECT_EQ(b.rbegin()->first, Generator<value_type>(1000)(999).first);
928 EXPECT_EQ(b.rbegin()->second, Generator<value_type>(1000)(999).second);
929}
930
931template <typename T>
932void BtreeMultiMapTest() {
933 typedef typename T::mapped_type mapped_type;
934 mapped_type m = Generator<mapped_type>(0)(0);
935 (void) m;
936}
937
938} // namespace btree
939
940#endif // UTIL_BTREE_BTREE_TEST_H__
diff --git a/xdelta3/cpp-btree/btree_test_flags.cc b/xdelta3/cpp-btree/btree_test_flags.cc
new file mode 100644
index 0000000..bf608a9
--- /dev/null
+++ b/xdelta3/cpp-btree/btree_test_flags.cc
@@ -0,0 +1,20 @@
1// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "gflags/gflags.h"
16
17DEFINE_int32(test_values, 10000,
18 "The number of values to use for tests.");
19DEFINE_int32(benchmark_values, 1000000,
20 "The number of values to use for benchmarks.");
diff --git a/xdelta3/cpp-btree/safe_btree.h b/xdelta3/cpp-btree/safe_btree.h
new file mode 100644
index 0000000..2d85c70
--- /dev/null
+++ b/xdelta3/cpp-btree/safe_btree.h
@@ -0,0 +1,395 @@
1// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// A safe_btree<> wraps around a btree<> and removes the caveat that insertion
16// and deletion invalidate iterators. A safe_btree<> maintains a generation
17// number that is incremented on every mutation. A safe_btree<>::iterator keeps
18// a pointer to the safe_btree<> it came from, the generation of the tree when
19// it was last validated and the key the underlying btree<>::iterator points
20// to. If an iterator is accessed and its generation differs from the tree
21// generation it is revalidated.
22//
23// References and pointers returned by safe_btree iterators are not safe.
24//
25// See the incorrect usage examples mentioned in safe_btree_set.h and
26// safe_btree_map.h.
27
28#ifndef UTIL_BTREE_SAFE_BTREE_H__
29#define UTIL_BTREE_SAFE_BTREE_H__
30
31#include <stddef.h>
32#include <iosfwd>
33#include <utility>
34
35#include "btree.h"
36
37namespace btree {
38
39template <typename Tree, typename Iterator>
40class safe_btree_iterator {
41 public:
42 typedef typename Iterator::key_type key_type;
43 typedef typename Iterator::value_type value_type;
44 typedef typename Iterator::size_type size_type;
45 typedef typename Iterator::difference_type difference_type;
46 typedef typename Iterator::pointer pointer;
47 typedef typename Iterator::reference reference;
48 typedef typename Iterator::const_pointer const_pointer;
49 typedef typename Iterator::const_reference const_reference;
50 typedef typename Iterator::iterator_category iterator_category;
51 typedef typename Tree::iterator iterator;
52 typedef typename Tree::const_iterator const_iterator;
53 typedef safe_btree_iterator<Tree, Iterator> self_type;
54
55 void update() const {
56 if (iter_ != tree_->internal_btree()->end()) {
57 // A positive generation indicates a valid key.
58 generation_ = tree_->generation();
59 key_ = iter_.key();
60 } else {
61 // Use a negative generation to indicate iter_ points to end().
62 generation_ = -tree_->generation();
63 }
64 }
65
66 public:
67 safe_btree_iterator()
68 : generation_(0),
69 key_(),
70 iter_(),
71 tree_(NULL) {
72 }
73 safe_btree_iterator(const iterator &x)
74 : generation_(x.generation()),
75 key_(x.key()),
76 iter_(x.iter()),
77 tree_(x.tree()) {
78 }
79 safe_btree_iterator(Tree *tree, const Iterator &iter)
80 : generation_(),
81 key_(),
82 iter_(iter),
83 tree_(tree) {
84 update();
85 }
86
87 Tree* tree() const { return tree_; }
88 int64_t generation() const { return generation_; }
89
90 Iterator* mutable_iter() const {
91 if (generation_ != tree_->generation()) {
92 if (generation_ > 0) {
93 // This does the wrong thing for a multi{set,map}. If my iter was
94 // pointing to the 2nd of 2 values with the same key, then this will
95 // reset it to point to the first. This is why we don't provide a
96 // safe_btree_multi{set,map}.
97 iter_ = tree_->internal_btree()->lower_bound(key_);
98 update();
99 } else if (-generation_ != tree_->generation()) {
100 iter_ = tree_->internal_btree()->end();
101 generation_ = -tree_->generation();
102 }
103 }
104 return &iter_;
105 }
106 const Iterator& iter() const {
107 return *mutable_iter();
108 }
109
110 // Equality/inequality operators.
111 bool operator==(const const_iterator &x) const {
112 return iter() == x.iter();
113 }
114 bool operator!=(const const_iterator &x) const {
115 return iter() != x.iter();
116 }
117
118 // Accessors for the key/value the iterator is pointing at.
119 const key_type& key() const {
120 return key_;
121 }
122 // This reference value is potentially invalidated by any non-const
123 // method on the tree; it is NOT safe.
124 reference operator*() const {
125 assert(generation_ > 0);
126 return iter().operator*();
127 }
128 // This pointer value is potentially invalidated by any non-const
129 // method on the tree; it is NOT safe.
130 pointer operator->() const {
131 assert(generation_ > 0);
132 return iter().operator->();
133 }
134
135 // Increment/decrement operators.
136 self_type& operator++() {
137 ++(*mutable_iter());
138 update();
139 return *this;
140 }
141 self_type& operator--() {
142 --(*mutable_iter());
143 update();
144 return *this;
145 }
146 self_type operator++(int) {
147 self_type tmp = *this;
148 ++*this;
149 return tmp;
150 }
151 self_type operator--(int) {
152 self_type tmp = *this;
153 --*this;
154 return tmp;
155 }
156
157 private:
158 // The generation of the tree when "iter" was updated.
159 mutable int64_t generation_;
160 // The key the iterator points to.
161 mutable key_type key_;
162 // The underlying iterator.
163 mutable Iterator iter_;
164 // The tree the iterator is associated with.
165 Tree *tree_;
166};
167
168template <typename Params>
169class safe_btree {
170 typedef safe_btree<Params> self_type;
171
172 typedef btree<Params> btree_type;
173 typedef typename btree_type::iterator tree_iterator;
174 typedef typename btree_type::const_iterator tree_const_iterator;
175
176 public:
177 typedef typename btree_type::params_type params_type;
178 typedef typename btree_type::key_type key_type;
179 typedef typename btree_type::data_type data_type;
180 typedef typename btree_type::mapped_type mapped_type;
181 typedef typename btree_type::value_type value_type;
182 typedef typename btree_type::key_compare key_compare;
183 typedef typename btree_type::allocator_type allocator_type;
184 typedef typename btree_type::pointer pointer;
185 typedef typename btree_type::const_pointer const_pointer;
186 typedef typename btree_type::reference reference;
187 typedef typename btree_type::const_reference const_reference;
188 typedef typename btree_type::size_type size_type;
189 typedef typename btree_type::difference_type difference_type;
190 typedef safe_btree_iterator<self_type, tree_iterator> iterator;
191 typedef safe_btree_iterator<
192 const self_type, tree_const_iterator> const_iterator;
193 typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
194 typedef std::reverse_iterator<iterator> reverse_iterator;
195
196 public:
197 // Default constructor.
198 safe_btree(const key_compare &comp, const allocator_type &alloc)
199 : tree_(comp, alloc),
200 generation_(1) {
201 }
202
203 // Copy constructor.
204 safe_btree(const self_type &x)
205 : tree_(x.tree_),
206 generation_(1) {
207 }
208
209 iterator begin() {
210 return iterator(this, tree_.begin());
211 }
212 const_iterator begin() const {
213 return const_iterator(this, tree_.begin());
214 }
215 iterator end() {
216 return iterator(this, tree_.end());
217 }
218 const_iterator end() const {
219 return const_iterator(this, tree_.end());
220 }
221 reverse_iterator rbegin() {
222 return reverse_iterator(end());
223 }
224 const_reverse_iterator rbegin() const {
225 return const_reverse_iterator(end());
226 }
227 reverse_iterator rend() {
228 return reverse_iterator(begin());
229 }
230 const_reverse_iterator rend() const {
231 return const_reverse_iterator(begin());
232 }
233
234 // Lookup routines.
235 iterator lower_bound(const key_type &key) {
236 return iterator(this, tree_.lower_bound(key));
237 }
238 const_iterator lower_bound(const key_type &key) const {
239 return const_iterator(this, tree_.lower_bound(key));
240 }
241 iterator upper_bound(const key_type &key) {
242 return iterator(this, tree_.upper_bound(key));
243 }
244 const_iterator upper_bound(const key_type &key) const {
245 return const_iterator(this, tree_.upper_bound(key));
246 }
247 std::pair<iterator, iterator> equal_range(const key_type &key) {
248 std::pair<tree_iterator, tree_iterator> p = tree_.equal_range(key);
249 return std::make_pair(iterator(this, p.first),
250 iterator(this, p.second));
251 }
252 std::pair<const_iterator, const_iterator> equal_range(const key_type &key) const {
253 std::pair<tree_const_iterator, tree_const_iterator> p = tree_.equal_range(key);
254 return std::make_pair(const_iterator(this, p.first),
255 const_iterator(this, p.second));
256 }
257 iterator find_unique(const key_type &key) {
258 return iterator(this, tree_.find_unique(key));
259 }
260 const_iterator find_unique(const key_type &key) const {
261 return const_iterator(this, tree_.find_unique(key));
262 }
263 iterator find_multi(const key_type &key) {
264 return iterator(this, tree_.find_multi(key));
265 }
266 const_iterator find_multi(const key_type &key) const {
267 return const_iterator(this, tree_.find_multi(key));
268 }
269 size_type count_unique(const key_type &key) const {
270 return tree_.count_unique(key);
271 }
272 size_type count_multi(const key_type &key) const {
273 return tree_.count_multi(key);
274 }
275
276 // Insertion routines.
277 template <typename ValuePointer>
278 std::pair<iterator, bool> insert_unique(const key_type &key, ValuePointer value) {
279 std::pair<tree_iterator, bool> p = tree_.insert_unique(key, value);
280 generation_ += p.second;
281 return std::make_pair(iterator(this, p.first), p.second);
282 }
283 std::pair<iterator, bool> insert_unique(const value_type &v) {
284 std::pair<tree_iterator, bool> p = tree_.insert_unique(v);
285 generation_ += p.second;
286 return std::make_pair(iterator(this, p.first), p.second);
287 }
288 iterator insert_unique(iterator position, const value_type &v) {
289 tree_iterator tree_pos = position.iter();
290 ++generation_;
291 return iterator(this, tree_.insert_unique(tree_pos, v));
292 }
293 template <typename InputIterator>
294 void insert_unique(InputIterator b, InputIterator e) {
295 for (; b != e; ++b) {
296 insert_unique(*b);
297 }
298 }
299 iterator insert_multi(const value_type &v) {
300 ++generation_;
301 return iterator(this, tree_.insert_multi(v));
302 }
303 iterator insert_multi(iterator position, const value_type &v) {
304 tree_iterator tree_pos = position.iter();
305 ++generation_;
306 return iterator(this, tree_.insert_multi(tree_pos, v));
307 }
308 template <typename InputIterator>
309 void insert_multi(InputIterator b, InputIterator e) {
310 for (; b != e; ++b) {
311 insert_multi(*b);
312 }
313 }
314 self_type& operator=(const self_type &x) {
315 if (&x == this) {
316 // Don't copy onto ourselves.
317 return *this;
318 }
319 ++generation_;
320 tree_ = x.tree_;
321 return *this;
322 }
323
324 // Deletion routines.
325 void erase(const iterator &begin, const iterator &end) {
326 tree_.erase(begin.iter(), end.iter());
327 ++generation_;
328 }
329 // Erase the specified iterator from the btree. The iterator must be valid
330 // (i.e. not equal to end()). Return an iterator pointing to the node after
331 // the one that was erased (or end() if none exists).
332 iterator erase(iterator iter) {
333 tree_iterator res = tree_.erase(iter.iter());
334 ++generation_;
335 return iterator(this, res);
336 }
337 int erase_unique(const key_type &key) {
338 int res = tree_.erase_unique(key);
339 generation_ += res;
340 return res;
341 }
342 int erase_multi(const key_type &key) {
343 int res = tree_.erase_multi(key);
344 generation_ += res;
345 return res;
346 }
347
348 // Access to the underlying btree.
349 btree_type* internal_btree() { return &tree_; }
350 const btree_type* internal_btree() const { return &tree_; }
351
352 // Utility routines.
353 void clear() {
354 ++generation_;
355 tree_.clear();
356 }
357 void swap(self_type &x) {
358 ++generation_;
359 ++x.generation_;
360 tree_.swap(x.tree_);
361 }
362 void dump(std::ostream &os) const {
363 tree_.dump(os);
364 }
365 void verify() const {
366 tree_.verify();
367 }
368 int64_t generation() const {
369 return generation_;
370 }
371 key_compare key_comp() const { return tree_.key_comp(); }
372
373 // Size routines.
374 size_type size() const { return tree_.size(); }
375 size_type max_size() const { return tree_.max_size(); }
376 bool empty() const { return tree_.empty(); }
377 size_type height() const { return tree_.height(); }
378 size_type internal_nodes() const { return tree_.internal_nodes(); }
379 size_type leaf_nodes() const { return tree_.leaf_nodes(); }
380 size_type nodes() const { return tree_.nodes(); }
381 size_type bytes_used() const { return tree_.bytes_used(); }
382 static double average_bytes_per_value() {
383 return btree_type::average_bytes_per_value();
384 }
385 double fullness() const { return tree_.fullness(); }
386 double overhead() const { return tree_.overhead(); }
387
388 private:
389 btree_type tree_;
390 int64_t generation_;
391};
392
393} // namespace btree
394
395#endif // UTIL_BTREE_SAFE_BTREE_H__
diff --git a/xdelta3/cpp-btree/safe_btree_map.h b/xdelta3/cpp-btree/safe_btree_map.h
new file mode 100644
index 0000000..a0668f1
--- /dev/null
+++ b/xdelta3/cpp-btree/safe_btree_map.h
@@ -0,0 +1,89 @@
1// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// The safe_btree_map<> is like btree_map<> except that it removes the caveat
16// about insertion and deletion invalidating existing iterators at a small cost
17// in making iterators larger and slower.
18//
19// Revalidation occurs whenever an iterator is accessed. References
20// and pointers returned by safe_btree_map<> iterators are not stable,
21// they are potentially invalidated by any non-const method on the map.
22//
23// BEGIN INCORRECT EXAMPLE
24// for (auto i = safe_map->begin(); i != safe_map->end(); ++i) {
25// const T *value = &i->second; // DO NOT DO THIS
26// [code that modifies safe_map and uses value];
27// }
28// END INCORRECT EXAMPLE
29#ifndef UTIL_BTREE_SAFE_BTREE_MAP_H__
30#define UTIL_BTREE_SAFE_BTREE_MAP_H__
31
32#include <functional>
33#include <memory>
34#include <utility>
35
36#include "btree_container.h"
37#include "btree_map.h"
38#include "safe_btree.h"
39
40namespace btree {
41
42// The safe_btree_map class is needed mainly for its constructors.
43template <typename Key, typename Value,
44 typename Compare = std::less<Key>,
45 typename Alloc = std::allocator<std::pair<const Key, Value> >,
46 int TargetNodeSize = 256>
47class safe_btree_map : public btree_map_container<
48 safe_btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
49
50 typedef safe_btree_map<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
51 typedef btree_map_params<
52 Key, Value, Compare, Alloc, TargetNodeSize> params_type;
53 typedef safe_btree<params_type> btree_type;
54 typedef btree_map_container<btree_type> super_type;
55
56 public:
57 typedef typename btree_type::key_compare key_compare;
58 typedef typename btree_type::allocator_type allocator_type;
59
60 public:
61 // Default constructor.
62 safe_btree_map(const key_compare &comp = key_compare(),
63 const allocator_type &alloc = allocator_type())
64 : super_type(comp, alloc) {
65 }
66
67 // Copy constructor.
68 safe_btree_map(const self_type &x)
69 : super_type(x) {
70 }
71
72 // Range constructor.
73 template <class InputIterator>
74 safe_btree_map(InputIterator b, InputIterator e,
75 const key_compare &comp = key_compare(),
76 const allocator_type &alloc = allocator_type())
77 : super_type(b, e, comp, alloc) {
78 }
79};
80
81template <typename K, typename V, typename C, typename A, int N>
82inline void swap(safe_btree_map<K, V, C, A, N> &x,
83 safe_btree_map<K, V, C, A, N> &y) {
84 x.swap(y);
85}
86
87} // namespace btree
88
89#endif // UTIL_BTREE_SAFE_BTREE_MAP_H__
diff --git a/xdelta3/cpp-btree/safe_btree_set.h b/xdelta3/cpp-btree/safe_btree_set.h
new file mode 100644
index 0000000..a6cd541
--- /dev/null
+++ b/xdelta3/cpp-btree/safe_btree_set.h
@@ -0,0 +1,88 @@
1// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// The safe_btree_set<> is like btree_set<> except that it removes the caveat
16// about insertion and deletion invalidating existing iterators at a small cost
17// in making iterators larger and slower.
18//
19// Revalidation occurs whenever an iterator is accessed. References
20// and pointers returned by safe_btree_map<> iterators are not stable,
21// they are potentially invalidated by any non-const method on the set.
22//
23// BEGIN INCORRECT EXAMPLE
24// for (auto i = safe_set->begin(); i != safe_set->end(); ++i) {
25// const T &value = *i; // DO NOT DO THIS
26// [code that modifies safe_set and uses value];
27// }
28// END INCORRECT EXAMPLE
29
30#ifndef UTIL_BTREE_SAFE_BTREE_SET_H__
31#define UTIL_BTREE_SAFE_BTREE_SET_H__
32
33#include <functional>
34#include <memory>
35
36#include "btree_container.h"
37#include "btree_set.h"
38#include "safe_btree.h"
39
40namespace btree {
41
42// The safe_btree_set class is needed mainly for its constructors.
43template <typename Key,
44 typename Compare = std::less<Key>,
45 typename Alloc = std::allocator<Key>,
46 int TargetNodeSize = 256>
47class safe_btree_set : public btree_unique_container<
48 safe_btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > > {
49
50 typedef safe_btree_set<Key, Compare, Alloc, TargetNodeSize> self_type;
51 typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
52 typedef safe_btree<params_type> btree_type;
53 typedef btree_unique_container<btree_type> super_type;
54
55 public:
56 typedef typename btree_type::key_compare key_compare;
57 typedef typename btree_type::allocator_type allocator_type;
58
59 public:
60 // Default constructor.
61 safe_btree_set(const key_compare &comp = key_compare(),
62 const allocator_type &alloc = allocator_type())
63 : super_type(comp, alloc) {
64 }
65
66 // Copy constructor.
67 safe_btree_set(const self_type &x)
68 : super_type(x) {
69 }
70
71 // Range constructor.
72 template <class InputIterator>
73 safe_btree_set(InputIterator b, InputIterator e,
74 const key_compare &comp = key_compare(),
75 const allocator_type &alloc = allocator_type())
76 : super_type(b, e, comp, alloc) {
77 }
78};
79
80template <typename K, typename C, typename A, int N>
81inline void swap(safe_btree_set<K, C, A, N> &x,
82 safe_btree_set<K, C, A, N> &y) {
83 x.swap(y);
84}
85
86} // namespace btree
87
88#endif // UTIL_BTREE_SAFE_BTREE_SET_H__
diff --git a/xdelta3/cpp-btree/safe_btree_test.cc b/xdelta3/cpp-btree/safe_btree_test.cc
new file mode 100644
index 0000000..0d77ae0
--- /dev/null
+++ b/xdelta3/cpp-btree/safe_btree_test.cc
@@ -0,0 +1,116 @@
1// Copyright 2013 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// TODO(pmattis): Add some tests that iterators are not invalidated by
16// insertion and deletion.
17
18#include <functional>
19#include <map>
20#include <set>
21#include <string>
22#include <utility>
23
24#include "gtest/gtest.h"
25#include "btree_test.h"
26#include "safe_btree_map.h"
27#include "safe_btree_set.h"
28
29class UnsafeArena;
30
31namespace btree {
32namespace {
33
34template <typename K, int N>
35void SetTest() {
36 typedef TestAllocator<K> TestAlloc;
37 BtreeTest<safe_btree_set<K, std::less<K>, std::allocator<K>, N>, std::set<K> >();
38 BtreeAllocatorTest<safe_btree_set<K, std::less<K>, TestAlloc, N> >();
39}
40
41template <typename K, int N>
42void MapTest() {
43 typedef TestAllocator<K> TestAlloc;
44 BtreeTest<safe_btree_map<K, K, std::less<K>, std::allocator<K>, N>, std::map<K, K> >();
45 BtreeAllocatorTest<safe_btree_map<K, K, std::less<K>, TestAlloc, N> >();
46 BtreeMapTest<safe_btree_map<K, K, std::less<K>, std::allocator<K>, N> >();
47}
48
49TEST(SafeBtree, set_int32_32) { SetTest<int32_t, 32>(); }
50TEST(SafeBtree, set_int32_64) { SetTest<int32_t, 64>(); }
51TEST(SafeBtree, set_int32_128) { SetTest<int32_t, 128>(); }
52TEST(SafeBtree, set_int32_256) { SetTest<int32_t, 256>(); }
53TEST(SafeBtree, set_int64_256) { SetTest<int64_t, 256>(); }
54TEST(SafeBtree, set_string_256) { SetTest<std::string, 256>(); }
55TEST(SafeBtree, set_pair_256) { SetTest<std::pair<int, int>, 256>(); }
56TEST(SafeBtree, map_int32_256) { MapTest<int32_t, 256>(); }
57TEST(SafeBtree, map_int64_256) { MapTest<int64_t, 256>(); }
58TEST(SafeBtree, map_string_256) { MapTest<std::string, 256>(); }
59TEST(SafeBtree, map_pair_256) { MapTest<std::pair<int, int>, 256>(); }
60
61TEST(SafeBtree, Comparison) {
62 const int kSetSize = 1201;
63 safe_btree_set<int64_t> my_set;
64 for (int i = 0; i < kSetSize; ++i) {
65 my_set.insert(i);
66 }
67 safe_btree_set<int64_t> my_set_copy(my_set);
68 EXPECT_TRUE(my_set_copy == my_set);
69 EXPECT_TRUE(my_set == my_set_copy);
70 EXPECT_FALSE(my_set_copy != my_set);
71 EXPECT_FALSE(my_set != my_set_copy);
72
73 my_set.insert(kSetSize);
74 EXPECT_FALSE(my_set_copy == my_set);
75 EXPECT_FALSE(my_set == my_set_copy);
76 EXPECT_TRUE(my_set_copy != my_set);
77 EXPECT_TRUE(my_set != my_set_copy);
78
79 my_set.erase(kSetSize - 1);
80 EXPECT_FALSE(my_set_copy == my_set);
81 EXPECT_FALSE(my_set == my_set_copy);
82 EXPECT_TRUE(my_set_copy != my_set);
83 EXPECT_TRUE(my_set != my_set_copy);
84
85 safe_btree_map<std::string, int64_t> my_map;
86 for (int i = 0; i < kSetSize; ++i) {
87 my_map[std::string(i, 'a')] = i;
88 }
89 safe_btree_map<std::string, int64_t> my_map_copy(my_map);
90 EXPECT_TRUE(my_map_copy == my_map);
91 EXPECT_TRUE(my_map == my_map_copy);
92 EXPECT_FALSE(my_map_copy != my_map);
93 EXPECT_FALSE(my_map != my_map_copy);
94
95 ++my_map_copy[std::string(7, 'a')];
96 EXPECT_FALSE(my_map_copy == my_map);
97 EXPECT_FALSE(my_map == my_map_copy);
98 EXPECT_TRUE(my_map_copy != my_map);
99 EXPECT_TRUE(my_map != my_map_copy);
100
101 my_map_copy = my_map;
102 my_map["hello"] = kSetSize;
103 EXPECT_FALSE(my_map_copy == my_map);
104 EXPECT_FALSE(my_map == my_map_copy);
105 EXPECT_TRUE(my_map_copy != my_map);
106 EXPECT_TRUE(my_map != my_map_copy);
107
108 my_map.erase(std::string(kSetSize - 1, 'a'));
109 EXPECT_FALSE(my_map_copy == my_map);
110 EXPECT_FALSE(my_map == my_map_copy);
111 EXPECT_TRUE(my_map_copy != my_map);
112 EXPECT_TRUE(my_map != my_map_copy);
113}
114
115} // namespace
116} // namespace btree
diff --git a/xdelta3/examples/checksum_test.cc b/xdelta3/examples/checksum_test.cc
deleted file mode 100644
index 028922b..0000000
--- a/xdelta3/examples/checksum_test.cc
+++ /dev/null
@@ -1,732 +0,0 @@
1/* Copyright (C) 2007 Josh MacDonald */
2
3extern "C" {
4#include "test.h"
5#include <assert.h>
6}
7
8#include <list>
9#include <vector>
10#include <map>
11#include <algorithm>
12
13using std::list;
14using std::map;
15using std::vector;
16
17// MLCG parameters
18// a, a*
19uint32_t good_32bit_values[] = {
20 1597334677U, // ...
21 741103597U, 887987685U,
22};
23
24// a, a*
25uint64_t good_64bit_values[] = {
26 1181783497276652981ULL, 4292484099903637661ULL,
27 7664345821815920749ULL, // ...
28};
29
30struct true_type { };
31struct false_type { };
32
33template <typename Word>
34int bitsof();
35
36template<>
37int bitsof<uint32_t>() {
38 return 32;
39}
40
41template<>
42int bitsof<uint64_t>() {
43 return 64;
44}
45
46struct plain {
47 int operator()(const uint8_t &c) {
48 return c;
49 }
50};
51
52template <typename Word>
53struct hhash { // take "h" of the high-bits as a hash value for this
54 // checksum, which are the most "distant" in terms of the
55 // spectral test for the rabin_karp MLCG. For short windows,
56 // the high bits aren't enough, XOR "mask" worth of these in.
57 Word operator()(const Word& t, const int &h, const int &mask) {
58 return (t >> h) ^ (t & mask);
59 }
60};
61
62template <typename Word>
63Word good_word();
64
65template<>
66uint32_t good_word<uint32_t>() {
67 return good_32bit_values[0];
68}
69
70template<>
71uint64_t good_word<uint64_t>() {
72 return good_64bit_values[0];
73}
74
75// CLASSES
76
77#define SELF Word, CksumSize, CksumSkip, Permute, Hash, Compaction
78#define MEMBER template <typename Word, \
79 int CksumSize, \
80 int CksumSkip, \
81 typename Permute, \
82 typename Hash, \
83 int Compaction>
84
85MEMBER
86struct cksum_params {
87 typedef Word word_type;
88 typedef Permute permute_type;
89 typedef Hash hash_type;
90
91 enum { cksum_size = CksumSize,
92 cksum_skip = CksumSkip,
93 compaction = Compaction,
94 };
95};
96
97
98MEMBER
99struct rabin_karp {
100 typedef Word word_type;
101 typedef Permute permute_type;
102 typedef Hash hash_type;
103
104 enum { cksum_size = CksumSize,
105 cksum_skip = CksumSkip,
106 compaction = Compaction,
107 };
108
109 // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ...
110 rabin_karp() {
111 multiplier = good_word<Word>();
112 powers = new Word[cksum_size];
113 powers[cksum_size - 1] = 1;
114 for (int i = cksum_size - 2; i >= 0; i--) {
115 powers[i] = powers[i + 1] * multiplier;
116 }
117 product = powers[0] * multiplier;
118 }
119
120 ~rabin_karp() {
121 delete [] powers;
122 }
123
124 Word step(const uint8_t *ptr) {
125 Word h = 0;
126 for (int i = 0; i < cksum_size; i++) {
127 h += permute_type()(ptr[i]) * powers[i];
128 }
129 return h;
130 }
131
132 Word state0(const uint8_t *ptr) {
133 incr_state = step(ptr);
134 return incr_state;
135 }
136
137 Word incr(const uint8_t *ptr) {
138 incr_state = multiplier * incr_state -
139 product * permute_type()(ptr[-1]) +
140 permute_type()(ptr[cksum_size - 1]);
141 return incr_state;
142 }
143
144 Word *powers;
145 Word product;
146 Word multiplier;
147 Word incr_state;
148};
149
150MEMBER
151struct adler32_cksum {
152 typedef Word word_type;
153 typedef Permute permute_type;
154 typedef Hash hash_type;
155
156 enum { cksum_size = CksumSize,
157 cksum_skip = CksumSkip,
158 compaction = Compaction,
159 };
160
161 Word step(const uint8_t *ptr) {
162 return xd3_lcksum (ptr, cksum_size);
163 }
164
165 Word state0(const uint8_t *ptr) {
166 incr_state = step(ptr);
167 return incr_state;
168 }
169
170 Word incr(const uint8_t *ptr) {
171 incr_state = xd3_large_cksum_update (incr_state, ptr - 1, cksum_size);
172 return incr_state;
173 }
174
175 Word incr_state;
176};
177
178// TESTS
179
180template <typename Word>
181struct file_stats {
182 typedef list<const uint8_t*> ptr_list;
183 typedef Word word_type;
184 typedef map<word_type, ptr_list> table_type;
185 typedef typename table_type::iterator table_iterator;
186 typedef typename ptr_list::iterator ptr_iterator;
187
188 int cksum_size;
189 int cksum_skip;
190 int unique;
191 int unique_values;
192 int count;
193 table_type table;
194
195 file_stats(int size, int skip)
196 : cksum_size(size),
197 cksum_skip(skip),
198 unique(0),
199 unique_values(0),
200 count(0) {
201 }
202
203 void reset() {
204 unique = 0;
205 unique_values = 0;
206 count = 0;
207 table.clear();
208 }
209
210 void update(const word_type &word, const uint8_t *ptr) {
211 table_iterator t_i = table.find(word);
212
213 count++;
214
215 if (t_i == table.end()) {
216 table.insert(make_pair(word, ptr_list()));
217 }
218
219 ptr_list &pl = table[word];
220
221 for (ptr_iterator p_i = pl.begin();
222 p_i != pl.end();
223 ++p_i) {
224 if (memcmp(*p_i, ptr, cksum_size) == 0) {
225 return;
226 }
227 }
228
229 unique++;
230 pl.push_back(ptr);
231 }
232
233 void freeze() {
234 unique_values = table.size();
235 table.clear();
236 }
237};
238
239struct test_result_base;
240
241static vector<test_result_base*> all_tests;
242
243struct test_result_base {
244 virtual ~test_result_base() {
245 }
246 virtual void reset() = 0;
247 virtual void print() = 0;
248 virtual void get(const uint8_t* buf, const int buf_size, int iters) = 0;
249 virtual void stat() = 0;
250 virtual int count() = 0;
251 virtual int dups() = 0;
252 virtual double uniqueness() = 0;
253 virtual double fullness() = 0;
254 virtual double collisions() = 0;
255 virtual double coverage() = 0;
256 virtual double compression() = 0;
257 virtual double time() = 0;
258 virtual double score() = 0;
259 virtual void set_score(double min_dups_frac, double min_time) = 0;
260 virtual double total_time() = 0;
261 virtual int total_count() = 0;
262 virtual int total_dups() = 0;
263};
264
265struct compare_h {
266 bool operator()(test_result_base *a,
267 test_result_base *b) {
268 return a->score() < b->score();
269 }
270};
271
272MEMBER
273struct test_result : public test_result_base {
274 typedef Word word_type;
275 typedef Permute permute_type;
276 typedef Hash hash_type;
277
278 enum { cksum_size = CksumSize,
279 cksum_skip = CksumSkip,
280 compaction = Compaction,
281 };
282
283 const char *test_name;
284 file_stats<Word> fstats;
285 int test_size;
286 int n_steps;
287 int n_incrs;
288 int s_bits;
289 int s_mask;
290 int t_entries;
291 int h_bits;
292 int h_buckets_full;
293 double h_score;
294 char *hash_table;
295 long accum_millis;
296 int accum_iters;
297
298 // These are not reset
299 double accum_time;
300 int accum_count;
301 int accum_dups;
302 int accum_colls;
303 int accum_size;
304
305 test_result(const char *name)
306 : test_name(name),
307 fstats(cksum_size, cksum_skip),
308 hash_table(NULL),
309 accum_millis(0),
310 accum_iters(0),
311 accum_time(0.0),
312 accum_count(0),
313 accum_dups(0),
314 accum_colls(0),
315 accum_size(0) {
316 all_tests.push_back(this);
317 }
318
319 ~test_result() {
320 reset();
321 }
322
323 void reset() {
324 // size of file
325 test_size = -1;
326
327 // count
328 n_steps = -1;
329 n_incrs = -1;
330
331 // four values used by new_table()/summarize_table()
332 s_bits = -1;
333 s_mask = -1;
334 t_entries = -1;
335 h_bits = -1;
336 h_buckets_full = -1;
337
338 accum_millis = 0;
339 accum_iters = 0;
340
341 fstats.reset();
342
343 // temporary
344 if (hash_table) {
345 delete(hash_table);
346 hash_table = NULL;
347 }
348 }
349
350 int count() {
351 if (cksum_skip == 1) {
352 return n_incrs;
353 } else {
354 return n_steps;
355 }
356 }
357
358 int dups() {
359 return fstats.count - fstats.unique;
360 }
361
362 int colls() {
363 return fstats.unique - fstats.unique_values;
364 }
365
366 double uniqueness() {
367 return 1.0 - (double) dups() / count();
368 }
369
370 double fullness() {
371 return (double) h_buckets_full / (1 << h_bits);
372 }
373
374 double collisions() {
375 return (double) colls() / fstats.unique;
376 }
377
378 double coverage() {
379 return (double) h_buckets_full / uniqueness() / count();
380 }
381
382 double compression() {
383 return 1.0 - coverage();
384 }
385
386 double time() {
387 return (double) accum_millis / accum_iters;
388 }
389
390 double score() {
391 return h_score;
392 }
393
394 void set_score(double min_compression, double min_time) {
395 h_score = (compression() - 0.99 * min_compression)
396 * (time() - 0.99 * min_time);
397 }
398
399 double total_time() {
400 return accum_time;
401 }
402
403 int total_count() {
404 return accum_count;
405 }
406
407 int total_dups() {
408 return accum_dups;
409 }
410
411 int total_colls() {
412 return accum_dups;
413 }
414
415 void stat() {
416 accum_time += time();
417 accum_count += count();
418 accum_dups += dups();
419 accum_colls += colls();
420 accum_size += test_size;
421 }
422
423 void print() {
424 if (fstats.count != count()) {
425 fprintf(stderr, "internal error: %d != %d\n", fstats.count, count());
426 abort();
427 }
428 printf("%s: (%u#%u) count %u uniq %0.2f%% full %u (%0.4f%% coll %0.4f%%) covers %0.2f%% w/ 2^%d @ %.4f MB/s %u iters\n",
429 test_name,
430 cksum_size,
431 cksum_skip,
432 count(),
433 100.0 * uniqueness(),
434 h_buckets_full,
435 100.0 * fullness(),
436 100.0 * collisions(),
437 100.0 * coverage(),
438 h_bits,
439 0.001 * accum_iters * test_size / accum_millis,
440 accum_iters);
441 }
442
443 int size_log2 (int slots)
444 {
445 int bits = bitsof<word_type>() - 1;
446 int i;
447
448 for (i = 3; i <= bits; i += 1) {
449 if (slots <= (1 << i)) {
450 return i - compaction;
451 }
452 }
453
454 return bits;
455 }
456
457 void new_table(int entries) {
458 t_entries = entries;
459 h_bits = size_log2(entries);
460
461 int n = 1 << h_bits;
462
463 s_bits = bitsof<word_type>() - h_bits;
464 s_mask = n - 1;
465
466 hash_table = new char[n / 8];
467 memset(hash_table, 0, n / 8);
468 }
469
470 int get_table_bit(int i) {
471 return hash_table[i/8] & (1 << i%8);
472 }
473
474 int set_table_bit(int i) {
475 return hash_table[i/8] |= (1 << i%8);
476 }
477
478 void summarize_table() {
479 int n = 1 << h_bits;
480 int f = 0;
481 for (int i = 0; i < n; i++) {
482 if (get_table_bit(i)) {
483 f++;
484 }
485 }
486 h_buckets_full = f;
487 }
488
489 void get(const uint8_t* buf, const int buf_size, int test_iters) {
490 rabin_karp<SELF> test;
491 //adler32_cksum<SELF> test;
492 hash_type hash;
493 const uint8_t *ptr;
494 const uint8_t *end;
495 int last_offset;
496 int periods;
497 int stop;
498
499 test_size = buf_size;
500 last_offset = buf_size - cksum_size;
501
502 if (last_offset < 0) {
503 periods = 0;
504 n_steps = 0;
505 n_incrs = 0;
506 stop = -cksum_size;
507 } else {
508 periods = last_offset / cksum_skip;
509 n_steps = periods + 1;
510 n_incrs = last_offset + 1;
511 stop = last_offset - (periods + 1) * cksum_skip;
512 }
513
514 // Compute file stats once.
515 if (fstats.unique_values == 0) {
516 if (cksum_skip == 1) {
517 for (int i = 0; i <= buf_size - cksum_size; i++) {
518 fstats.update(hash(test.step(buf + i), s_bits, s_mask), buf + i);
519 }
520 } else {
521 ptr = buf + last_offset;
522 end = buf + stop;
523
524 for (; ptr != end; ptr -= cksum_skip) {
525 fstats.update(hash(test.step(ptr), s_bits, s_mask), ptr);
526 }
527 }
528 fstats.freeze();
529 }
530
531 long start_test = get_millisecs_now();
532
533 if (cksum_skip != 1) {
534 new_table(n_steps);
535
536 for (int i = 0; i < test_iters; i++) {
537 ptr = buf + last_offset;
538 end = buf + stop;
539
540 for (; ptr != end; ptr -= cksum_skip) {
541 set_table_bit(hash(test.step(ptr), s_bits, s_mask));
542 }
543 }
544
545 summarize_table();
546 }
547
548 stop = buf_size - cksum_size + 1;
549 if (stop < 0) {
550 stop = 0;
551 }
552
553 if (cksum_skip == 1) {
554
555 new_table(n_incrs);
556
557 for (int i = 0; i < test_iters; i++) {
558 ptr = buf;
559 end = buf + stop;
560
561 if (ptr != end) {
562 set_table_bit(hash(test.state0(ptr++), s_bits, s_mask));
563 }
564
565 for (; ptr != end; ptr++) {
566 Word w = test.incr(ptr);
567 assert(w == test.step(ptr));
568 set_table_bit(hash(w, s_bits, s_mask));
569 }
570 }
571
572 summarize_table();
573 }
574
575 accum_iters += test_iters;
576 accum_millis += get_millisecs_now() - start_test;
577 }
578};
579
580template <typename Word>
581void print_array(const char *tname) {
582 printf("static const %s hash_multiplier[64] = {\n", tname);
583 Word p = 1;
584 for (int i = 0; i < 64; i++) {
585 printf(" %uU,\n", p);
586 p *= good_word<Word>();
587 }
588 printf("};\n", tname);
589}
590
591int main(int argc, char** argv) {
592 int i;
593 uint8_t *buf = NULL;
594 size_t buf_len = 0;
595 int ret;
596
597 if (argc <= 1) {
598 fprintf(stderr, "usage: %s file ...\n", argv[0]);
599 return 1;
600 }
601
602 //print_array<uint32_t>("uint32_t");
603
604#define TEST(T,Z,S,P,H,C) test_result<T,Z,S,P,H<T>,C> \
605 _ ## T ## _ ## Z ## _ ## S ## _ ## P ## _ ## H ## _ ## C \
606 (#T "_" #Z "_" #S "_" #P "_" #H "_" #C)
607
608#if 0
609
610 TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \
611 TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \
612 TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \
613 TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \
614
615#endif
616
617#define TESTS(SKIP) \
618 TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \
619 TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \
620 TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \
621 TEST(uint32_t, 9, SKIP, plain, hhash, 3)
622
623#define TESTS_ALL(SKIP) \
624 TEST(uint32_t, 3, SKIP, plain, hhash, 0); \
625 TEST(uint32_t, 3, SKIP, plain, hhash, 1); \
626 TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \
627 TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \
628 TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \
629 TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \
630 TEST(uint32_t, 5, SKIP, plain, hhash, 0); \
631 TEST(uint32_t, 5, SKIP, plain, hhash, 1); \
632 TEST(uint32_t, 8, SKIP, plain, hhash, 0); \
633 TEST(uint32_t, 8, SKIP, plain, hhash, 1); \
634 TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \
635 TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \
636 TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \
637 TEST(uint32_t, 9, SKIP, plain, hhash, 3); /* x */ \
638 TEST(uint32_t, 11, SKIP, plain, hhash, 0); /* x */ \
639 TEST(uint32_t, 11, SKIP, plain, hhash, 1); /* x */ \
640 TEST(uint32_t, 13, SKIP, plain, hhash, 0); \
641 TEST(uint32_t, 13, SKIP, plain, hhash, 1); \
642 TEST(uint32_t, 15, SKIP, plain, hhash, 0); /* x */ \
643 TEST(uint32_t, 15, SKIP, plain, hhash, 1); /* x */ \
644 TEST(uint32_t, 16, SKIP, plain, hhash, 0); /* x */ \
645 TEST(uint32_t, 16, SKIP, plain, hhash, 1); /* x */ \
646 TEST(uint32_t, 21, SKIP, plain, hhash, 0); \
647 TEST(uint32_t, 21, SKIP, plain, hhash, 1); \
648 TEST(uint32_t, 34, SKIP, plain, hhash, 0); \
649 TEST(uint32_t, 34, SKIP, plain, hhash, 1); \
650 TEST(uint32_t, 55, SKIP, plain, hhash, 0); \
651 TEST(uint32_t, 55, SKIP, plain, hhash, 1)
652
653 TESTS(1); // *
654// TESTS(2); // *
655// TESTS(3); // *
656// TESTS(5); // *
657// TESTS(8); // *
658// TESTS(9);
659// TESTS(11);
660// TESTS(13); // *
661 TESTS(15);
662// TESTS(16);
663// TESTS(21); // *
664// TESTS(34); // *
665// TESTS(55); // *
666// TESTS(89); // *
667
668 for (i = 1; i < argc; i++) {
669 if ((ret = read_whole_file(argv[i],
670 & buf,
671 & buf_len))) {
672 return 1;
673 }
674
675 fprintf(stderr, "file %s is %zu bytes\n",
676 argv[i], buf_len);
677
678 double min_time = -1.0;
679 double min_compression = 0.0;
680
681 for (vector<test_result_base*>::iterator i = all_tests.begin();
682 i != all_tests.end(); ++i) {
683 test_result_base *test = *i;
684 test->reset();
685
686 int iters = 100;
687 long start_test = get_millisecs_now();
688
689 do {
690 test->get(buf, buf_len, iters);
691 iters *= 3;
692 iters /= 2;
693 } while (get_millisecs_now() - start_test < 2000);
694
695 test->stat();
696
697 if (min_time < 0.0) {
698 min_compression = test->compression();
699 min_time = test->time();
700 }
701
702 if (min_time > test->time()) {
703 min_time = test->time();
704 }
705
706 if (min_compression > test->compression()) {
707 min_compression = test->compression();
708 }
709
710 test->print();
711 }
712
713// for (vector<test_result_base*>::iterator i = all_tests.begin();
714// i != all_tests.end(); ++i) {
715// test_result_base *test = *i;
716// test->set_score(min_compression, min_time);
717// }
718
719// sort(all_tests.begin(), all_tests.end(), compare_h());
720
721// for (vector<test_result_base*>::iterator i = all_tests.begin();
722// i != all_tests.end(); ++i) {
723// test_result_base *test = *i;
724// test->print();
725// }
726
727 free(buf);
728 buf = NULL;
729 }
730
731 return 0;
732}
diff --git a/xdelta3/go/src/regtest.go b/xdelta3/go/src/regtest.go
new file mode 100644
index 0000000..4ffcdaf
--- /dev/null
+++ b/xdelta3/go/src/regtest.go
@@ -0,0 +1,274 @@
1package main
2
3import (
4 "fmt"
5 "io"
6 "path"
7 "os"
8 "sort"
9 "time"
10
11 "xdelta"
12)
13
14const (
15 xdataset = "/volume/home/jmacd/src/testdata"
16 xcompare = "/volume/home/jmacd/src/xdelta-devel/xdelta3/xdelta3"
17 xdelta3 = "/volume/home/jmacd/src/xdelta-64bithash/xdelta3/xdelta3"
18 seed = 1422253499919909358
19)
20
21type Config struct {
22 srcbuf_size int64
23 window_size int64
24 blocksize int
25}
26
27func NewC() Config {
28 // TODO make these (and above) flags
29 return Config{1<<26, 1<<22, 1<<16}
30}
31
32func (c Config) smokeTest(t *xdelta.TestGroup, p xdelta.Program) {
33 target := "Hello world!"
34 source := "Hello world, nice to meet you!"
35
36 enc, err := t.Exec("encode", p, true, []string{"-e"})
37 if err != nil {
38 t.Panic(err)
39 }
40 dec, err := t.Exec("decode", p, true, []string{"-d"})
41 if err != nil {
42 t.Panic(err)
43 }
44
45 encodeout := t.Drain(enc.Stdout, "encode.stdout")
46 decodeout := t.Drain(dec.Stdout, "decode.stdout")
47
48 t.Empty(enc.Stderr, "encode")
49 t.Empty(dec.Stderr, "decode")
50
51 t.TestWrite("encode.stdin", enc.Stdin, []byte(target))
52 t.TestWrite("encode.srcin", enc.Srcin, []byte(source))
53
54 t.TestWrite("decode.stdin", dec.Stdin, <-encodeout)
55 t.TestWrite("decode.srcin", dec.Srcin, []byte(source))
56
57 if do := string(<-decodeout); do != target {
58 t.Panic(fmt.Errorf("It's not working! %s\n!=\n%s\n", do, target))
59 }
60 t.Wait(enc, dec)
61}
62
63type PairTest struct {
64 // Input
65 Config
66 program xdelta.Program
67 source, target string
68
69 // Output
70 TestOutput
71}
72
73type TestOutput struct {
74 encoded int64
75 encDuration time.Duration
76 decDuration time.Duration
77 encSysDuration time.Duration
78 decSysDuration time.Duration
79}
80
81func (to *TestOutput) Add(a TestOutput) {
82 to.encoded += a.encoded
83 to.encDuration += a.encDuration
84 to.decDuration += a.decDuration
85 to.encSysDuration += a.encSysDuration
86 to.decSysDuration += a.decSysDuration
87}
88
89func (to *TestOutput) String() string {
90 return fmt.Sprintf("SIZE: %v\tT: %v\tTSYS: %v\tDT: %v\tDTSYS: %v",
91 to.encoded, to.encDuration, to.encSysDuration, to.decDuration, to.encSysDuration)
92}
93
94// P is the test program, Q is the reference version.
95func (cfg Config) datasetTest(t *xdelta.TestGroup, p, q xdelta.Program) {
96 dir, err := os.Open(xdataset)
97 if err != nil {
98 t.Panic(err)
99 }
100 dents, err := dir.Readdir(-1)
101 if err != nil {
102 t.Panic(err)
103 }
104 paths := make([]string, len(dents))
105 var total int64
106 for i, d := range dents {
107 if !d.Mode().IsRegular() {
108 continue
109 }
110 paths[i] = fmt.Sprint(xdataset, "/", d.Name())
111 total += d.Size()
112 }
113 meansize := total / int64(len(dents))
114 largest := uint(20)
115 for ; largest <= 31 && 1<<largest < meansize; largest++ {}
116
117 sort.Strings(paths)
118
119 testSum := map[uint]*TestOutput{}
120 compSum := map[uint]*TestOutput{}
121
122 for _, in1 := range paths {
123 for _, in2 := range paths {
124 if in1 == in2 { continue }
125
126 // 1/4, 1/2, and 1 of the power-of-2 rounded-up mean size
127 for b := largest - 2; b <= largest; b++ {
128 if _, has := testSum[b]; !has {
129 testSum[b] = &TestOutput{}
130 compSum[b] = &TestOutput{}
131 }
132 c1 := cfg
133 c1.srcbuf_size = 1<<b
134 ptest := &PairTest{c1, p, in1, in2, TestOutput{-1, 0, 0, 0, 0}}
135 ptest.datasetPairTest(t, 1<<b);
136 qtest := &PairTest{c1, q, in1, in2, TestOutput{-1, 0, 0, 0, 0}}
137 qtest.datasetPairTest(t, 1<<b)
138
139 testSum[b].Add(ptest.TestOutput)
140 compSum[b].Add(qtest.TestOutput)
141
142 fmt.Printf("%s, %s: %.2f%% %+d/%d\n\tE:%.2f%%/%s(%.2f%%/%s) D:%.2f%%/%s(%.2f%%/%s) [B=%d]\n",
143 path.Base(in1), path.Base(in2),
144 float64(ptest.encoded - qtest.encoded) * 100.0 / float64(qtest.encoded),
145 ptest.encoded - qtest.encoded,
146 qtest.encoded,
147 (ptest.encDuration - qtest.encDuration).Seconds() * 100.0 / qtest.encDuration.Seconds(),
148 qtest.encDuration,
149 (ptest.decDuration - qtest.decDuration).Seconds() * 100.0 / qtest.decDuration.Seconds(),
150 qtest.encDuration,
151 (ptest.encSysDuration - qtest.encSysDuration).Seconds() * 100.0 / qtest.encSysDuration.Seconds(),
152 qtest.encSysDuration,
153 (ptest.decSysDuration - qtest.decSysDuration).Seconds() * 100.0 / qtest.decSysDuration.Seconds(),
154 qtest.decSysDuration,
155 1<<b)
156 }
157 }
158 }
159 var keys []uint
160 for k, _ := range testSum {
161 keys = append(keys, k)
162 }
163 for _, k := range keys {
164 fmt.Printf("B=%v\nTEST: %v\nCOMP: %v\n", 1<<k, testSum[k], compSum[k])
165 }
166}
167
168func (pt *PairTest) datasetPairTest(t *xdelta.TestGroup, meanSize int64) {
169 cfg := pt.Config
170 eargs := []string{"-e", fmt.Sprint("-B", cfg.srcbuf_size), // "-q",
171 fmt.Sprint("-W", cfg.window_size), "-s", pt.source,
172 "-I0", "-S", "none", pt.target}
173 enc, err := t.Exec("encode", pt.program, false, eargs)
174 if err != nil {
175 t.Panic(err)
176 }
177
178 dargs := []string{"-dc", fmt.Sprint("-B", cfg.srcbuf_size), //"-q",
179 fmt.Sprint("-W", cfg.window_size), "-s", pt.source,
180 "-S", "none"}
181
182 dec, err := t.Exec("decode", pt.program, false, dargs)
183 if err != nil {
184 t.Panic(err)
185 }
186 tgt_check, err := os.Open(pt.target)
187 if err != nil {
188 t.Panic(err)
189 }
190 tgt_info, err := tgt_check.Stat()
191 if err != nil {
192 t.Panic(err)
193 }
194 t.Empty(enc.Stderr, "encode")
195 t.Empty(dec.Stderr, "decode")
196 t.CopyStreams(enc.Stdout, dec.Stdin, &pt.encoded)
197 t.CompareStreams(dec.Stdout, tgt_check, tgt_info.Size())
198
199 t.Wait(enc, dec)
200
201 pt.decDuration = dec.Cmd.ProcessState.UserTime()
202 pt.encDuration = enc.Cmd.ProcessState.UserTime()
203 pt.decSysDuration = dec.Cmd.ProcessState.SystemTime()
204 pt.encSysDuration = enc.Cmd.ProcessState.SystemTime()
205}
206
207func (cfg Config) offsetTest(t *xdelta.TestGroup, p xdelta.Program, offset, length int64) {
208 eargs := []string{"-e", "-0", fmt.Sprint("-B", cfg.srcbuf_size), "-q",
209 fmt.Sprint("-W", cfg.window_size)}
210 enc, err := t.Exec("encode", p, true, eargs)
211 if err != nil {
212 t.Panic(err)
213 }
214
215 dargs := []string{"-d", fmt.Sprint("-B", cfg.srcbuf_size), "-q",
216 fmt.Sprint("-W", cfg.window_size)}
217 dec, err := t.Exec("decode", p, true, dargs)
218 if err != nil {
219 t.Panic(err)
220 }
221
222 // The pipe used to read the decoder output and compare
223 // against the target.
224 read, write := io.Pipe()
225
226 t.Empty(enc.Stderr, "encode")
227 t.Empty(dec.Stderr, "decode")
228
229 var encoded_size int64
230 t.CopyStreams(enc.Stdout, dec.Stdin, &encoded_size)
231 t.CompareStreams(dec.Stdout, read, length)
232
233 // The decoder output ("read", above) is compared with the
234 // test-provided output ("write", below). The following
235 // generates two identical inputs.
236 t.WriteRstreams("encode", seed, offset, length, enc.Srcin, enc.Stdin)
237 t.WriteRstreams("decode", seed, offset, length, dec.Srcin, write)
238 t.Wait(enc, dec)
239
240 expect := cfg.srcbuf_size - offset
241 if float64(encoded_size) < (0.95 * float64(expect)) ||
242 float64(encoded_size) > (1.05 * float64(expect)) {
243 t.Fail("encoded size should be ~=", expect, ", actual ", encoded_size)
244 }
245}
246
247func main() {
248 r, err := xdelta.NewRunner()
249 if err != nil {
250 panic(err)
251 }
252 defer r.Cleanup()
253
254 cfg := NewC()
255
256 prog := xdelta.Program{xdelta3}
257
258 r.RunTest("smoketest", func(t *xdelta.TestGroup) { cfg.smokeTest(t, prog) })
259
260 for i := uint(29); i <= 33; i += 1 {
261 // The arguments to offsetTest are offset, source
262 // window size, and file size. The source window size
263 // is (2 << i) and (in the 3.0x release branch) is
264 // limited to 2^31, so the the greatest value of i is
265 // 30.
266 cfg.srcbuf_size = 2 << i
267 r.RunTest(fmt.Sprint("offset", i), func(t *xdelta.TestGroup) {
268 cfg.offsetTest(t, prog, 1 << i, 3 << i) })
269 }
270
271 comp := xdelta.Program{xcompare}
272
273 r.RunTest("dataset", func(t *xdelta.TestGroup) { cfg.datasetTest(t, prog, comp) })
274}
diff --git a/xdelta3/go/src/xdelta/rstream.go b/xdelta3/go/src/xdelta/rstream.go
new file mode 100644
index 0000000..99c3d17
--- /dev/null
+++ b/xdelta3/go/src/xdelta/rstream.go
@@ -0,0 +1,71 @@
1package xdelta
2
3
4import (
5 "io"
6 "math/rand"
7)
8
9const (
10 blocksize = 1<<17
11)
12
13func (t *TestGroup) WriteRstreams(desc string, seed, offset, len int64,
14 src, tgt io.WriteCloser) {
15 t.Go("src-write:"+desc, func (g *Goroutine) {
16 writeOne(g, seed, 0, len, tgt, false)
17 })
18 t.Go("tgt-write:"+desc, func (g *Goroutine) {
19 writeOne(g, seed, offset, len, src, true)
20 })
21}
22
23func writeOne(g *Goroutine, seed, offset, len int64, stream io.WriteCloser, readall bool) {
24 if !readall {
25 // Allow the source-read to fail or block until the process terminates.
26 // This behavior is reserved for the decoder, which is not required to
27 // read the entire source.
28 g.OK()
29 }
30 if offset != 0 {
31 // Fill with other random data until the offset
32 if err := writeRand(g, rand.New(rand.NewSource(^seed)), offset, stream); err != nil {
33 g.Panic(err)
34 }
35 }
36 if err := writeRand(g, rand.New(rand.NewSource(seed)),
37 len - offset, stream); err != nil {
38 g.Panic(err)
39 }
40 if err := stream.Close(); err != nil {
41 g.Panic(err)
42 }
43 g.OK()
44}
45
46func writeRand(g *Goroutine, r *rand.Rand, len int64, s io.Writer) error {
47 blk := make([]byte, blocksize)
48 for len > 0 {
49 fillRand(r, blk)
50 c := blocksize
51 if len < blocksize {
52 c = int(len)
53 }
54 if _, err := s.Write(blk[0:c]); err != nil {
55 return err
56 }
57 len -= int64(c)
58 }
59 return nil
60}
61
62func fillRand(r *rand.Rand, blk []byte) {
63 for p := 0; p < len(blk); {
64 v := r.Int63()
65 for i := 7; i != 0 && p < len(blk); i-- {
66 blk[p] = byte(v)
67 p++
68 v >>= 8
69 }
70 }
71}
diff --git a/xdelta3/go/src/xdelta/run.go b/xdelta3/go/src/xdelta/run.go
new file mode 100644
index 0000000..448fabe
--- /dev/null
+++ b/xdelta3/go/src/xdelta/run.go
@@ -0,0 +1,71 @@
1package xdelta
2
3import (
4 "fmt"
5 "io"
6 "io/ioutil"
7 "os"
8 "os/exec"
9)
10
11type Program struct {
12 Path string
13}
14
15type Run struct {
16 Cmd exec.Cmd
17 Srcfile string
18 Stdin io.WriteCloser
19 Srcin io.WriteCloser
20 Stdout io.ReadCloser
21 Stderr io.ReadCloser
22}
23
24type Runner struct {
25 Testdir string
26}
27
28func (r *Run) Wait() error {
29 return r.Cmd.Wait()
30}
31
32func NewRunner() (*Runner, error) {
33 if dir, err := ioutil.TempDir(tmpDir, "xrt"); err != nil {
34 return nil, err
35 } else {
36 return &Runner{dir}, nil
37 }
38}
39
40func (r *Runner) newTestGroup(name string) (*TestGroup) {
41 tg := &TestGroup{Runner: r}
42 tg.WaitGroup.Add(1)
43 g0 := &Goroutine{tg, name, false}
44 tg.running = append(tg.running, g0)
45 tg.main = g0
46 return tg
47}
48
49func (r *Runner) Cleanup() {
50 os.RemoveAll(r.Testdir)
51}
52
53func (r *Runner) RunTest(name string, f func (t *TestGroup)) {
54 t := r.newTestGroup(name)
55 c := make(chan interface{})
56 go func() {
57 defer func() {
58 rec := recover()
59 c <- rec
60 }()
61 fmt.Println("Testing", name, "...")
62 f(t)
63 c <- nil
64 }()
65 rec := <- c
66 if t.errors == nil && rec == nil {
67 fmt.Println("Success:", name)
68 } else {
69 fmt.Println("FAILED:", name, t.errors, rec)
70 }
71}
diff --git a/xdelta3/go/src/xdelta/test.go b/xdelta3/go/src/xdelta/test.go
new file mode 100644
index 0000000..7210698
--- /dev/null
+++ b/xdelta3/go/src/xdelta/test.go
@@ -0,0 +1,164 @@
1package xdelta
2
3import (
4 "bufio"
5 "bytes"
6 "errors"
7 "fmt"
8 "io"
9 "io/ioutil"
10 "os"
11 "path"
12 "sync/atomic"
13
14 "golang.org/x/sys/unix"
15)
16
17var (
18 tmpDir = "/tmp"
19 srcSeq int64
20)
21
22func (t *TestGroup) Drain(f io.ReadCloser, desc string) <-chan []byte {
23 c := make(chan []byte)
24 t.Go(desc, func(g *Goroutine) {
25 if b, err := ioutil.ReadAll(f); err != nil {
26 g.Panic(err)
27 } else {
28 c <- b
29 }
30 g.OK()
31 })
32 return c
33}
34
35func (t *TestGroup) Empty(f io.ReadCloser, desc string) *Goroutine {
36 return t.Go("empty:"+desc, func (g *Goroutine) {
37 s := bufio.NewScanner(f)
38 for s.Scan() {
39 os.Stderr.Write([]byte(fmt.Sprint(desc, ": ", s.Text(), "\n")))
40 }
41 err := s.Err()
42 f.Close()
43 if err != nil {
44 g.Panic(err)
45 }
46 g.OK()
47 })
48}
49
50func (t *TestGroup) TestWrite(what string, f io.WriteCloser, b []byte) *Goroutine {
51 return t.Go("write", func(g *Goroutine) {
52 if _, err := f.Write(b); err != nil {
53 g.Panic(err)
54 }
55 if err := f.Close(); err != nil {
56 g.Panic(err)
57 }
58 g.OK()
59 })
60}
61
62func (t *TestGroup) CopyStreams(r io.ReadCloser, w io.WriteCloser, written *int64) *Goroutine {
63 return t.Go("copy", func(g *Goroutine) {
64 nwrite, err := io.Copy(w, r)
65 if err != nil {
66 g.Panic(err)
67 }
68 err = r.Close()
69 if err != nil {
70 g.Panic(err)
71 }
72 err = w.Close()
73 if err != nil {
74 g.Panic(err)
75 }
76 g.OK()
77 *written = nwrite
78 })
79}
80
81func (t *TestGroup) CompareStreams(r1 io.ReadCloser, r2 io.ReadCloser, length int64) *Goroutine {
82 return t.Go("compare", func(g *Goroutine) {
83 b1 := make([]byte, blocksize)
84 b2 := make([]byte, blocksize)
85 var idx int64
86 for length > 0 {
87 c := blocksize
88 if length < blocksize {
89 c = int(length)
90 }
91 if _, err := io.ReadFull(r1, b1[0:c]); err != nil {
92 g.Panic(err)
93 }
94 if _, err := io.ReadFull(r2, b2[0:c]); err != nil {
95 g.Panic(err)
96 }
97 if bytes.Compare(b1[0:c], b2[0:c]) != 0 {
98 fmt.Println("B1 is", string(b1[0:c]))
99 fmt.Println("B2 is", string(b2[0:c]))
100 g.Panic(errors.New(fmt.Sprint("Bytes do not compare at ", idx)))
101 }
102 length -= int64(c)
103 idx += int64(c)
104 }
105 g.OK()
106 })
107}
108
109func (t *TestGroup) Exec(desc string, p Program, srcfifo bool, flags []string) (*Run, error) {
110 var err error
111 run := &Run{}
112 args := []string{p.Path}
113 if srcfifo {
114 num := atomic.AddInt64(&srcSeq, 1)
115 run.Srcfile = path.Join(t.Runner.Testdir, fmt.Sprint("source", num))
116 if err = unix.Mkfifo(run.Srcfile, 0600); err != nil {
117 return nil, err
118 }
119 read, write := io.Pipe()
120 t.writeFifo(run.Srcfile, read)
121 run.Srcin = write
122 args = append(args, "-s")
123 args = append(args, run.Srcfile)
124 }
125 if run.Stdin, err = run.Cmd.StdinPipe(); err != nil {
126 return nil, err
127 }
128 if run.Stdout, err = run.Cmd.StdoutPipe(); err != nil {
129 return nil, err
130 }
131 if run.Stderr, err = run.Cmd.StderrPipe(); err != nil {
132 return nil, err
133 }
134
135 run.Cmd.Path = p.Path
136 run.Cmd.Args = append(args, flags...)
137 run.Cmd.Dir = t.Runner.Testdir
138 if serr := run.Cmd.Start(); serr != nil {
139 return nil, serr
140 }
141 return run, nil
142}
143
144func (t *TestGroup) Fail(v ...interface{}) {
145 panic(fmt.Sprintln(v...))
146}
147
148func (t *TestGroup) writeFifo(srcfile string, read io.Reader) *Goroutine {
149 return t.Go("compare", func(g *Goroutine) {
150 fifo, err := os.OpenFile(srcfile, os.O_WRONLY, 0600)
151 if err != nil {
152 fifo.Close()
153 g.Panic(err)
154 }
155 if _, err := io.Copy(fifo, read); err != nil {
156 fifo.Close()
157 g.Panic(err)
158 }
159 if err := fifo.Close(); err != nil {
160 g.Panic(err)
161 }
162 g.OK()
163 })
164}
diff --git a/xdelta3/go/src/xdelta/tgroup.go b/xdelta3/go/src/xdelta/tgroup.go
new file mode 100644
index 0000000..602b1e1
--- /dev/null
+++ b/xdelta3/go/src/xdelta/tgroup.go
@@ -0,0 +1,97 @@
1package xdelta
2
3import (
4 "fmt"
5 "runtime"
6 "sync"
7)
8
9type TestGroup struct {
10 *Runner
11 main *Goroutine
12 sync.Mutex
13 sync.WaitGroup
14 running []*Goroutine
15 errors []error
16 nonerrors []error // For tolerated / expected conditions
17}
18
19type Goroutine struct {
20 *TestGroup
21 name string
22 done bool
23}
24
25func (g *Goroutine) String() string {
26 return fmt.Sprint("[", g.name, "]")
27}
28
29func (g *Goroutine) finish(err error) {
30 wait := false
31 tg := g.TestGroup
32 sbuf := make([]byte, 4096)
33 sbuf = sbuf[0:runtime.Stack(sbuf, false)]
34 if err != nil {
35 err = fmt.Errorf("%v:%v:%v", g.name, err, string(sbuf))
36 }
37 tg.Lock()
38 if g.done {
39 if err != nil {
40 tg.nonerrors = append(tg.nonerrors, err)
41 }
42 } else {
43 wait = true
44 g.done = true
45 if err != nil {
46 tg.errors = append(tg.errors, err)
47 }
48 }
49 tg.Unlock()
50 if wait {
51 tg.WaitGroup.Done()
52 }
53}
54
55func (g *Goroutine) OK() {
56 g.finish(nil)
57}
58
59func (g *Goroutine) Panic(err error) {
60 g.finish(err)
61 if g != g.TestGroup.main {
62 runtime.Goexit()
63 }
64}
65
66func (t *TestGroup) Main() *Goroutine { return t.main }
67
68func (t *TestGroup) Panic(err error) { t.Main().Panic(err) }
69
70func (t *TestGroup) Go(name string, f func(*Goroutine)) *Goroutine {
71 g := &Goroutine{t, name, false}
72 t.Lock()
73 t.WaitGroup.Add(1)
74 t.running = append(t.running, g)
75 t.Unlock()
76 go f(g)
77 return g
78}
79
80func (t *TestGroup) Wait(procs... *Run) {
81 t.Main().OK()
82 t.WaitGroup.Wait()
83 for _, p := range procs {
84 if err := p.Wait(); err != nil {
85 t.errors = append(t.errors, err)
86 }
87 }
88 for _, err := range t.errors {
89 fmt.Println(":ERROR:", err)
90 }
91 for _, err := range t.nonerrors {
92 fmt.Println("(ERROR)", err)
93 }
94 if len(t.errors) != 0 {
95 t.Fail("Test failed with", len(t.errors), "errors")
96 }
97}
diff --git a/xdelta3/py-compile b/xdelta3/py-compile
deleted file mode 120000
index f90bf99..0000000
--- a/xdelta3/py-compile
+++ /dev/null
@@ -1 +0,0 @@
1/usr/local/share/automake-1.11/py-compile \ No newline at end of file
diff --git a/xdelta3/run_release.sh b/xdelta3/run_release.sh
index ffd50fb..eb08afc 100755
--- a/xdelta3/run_release.sh
+++ b/xdelta3/run_release.sh
@@ -46,6 +46,7 @@ function setup {
46 libtoolize 46 libtoolize
47 automake --add-missing 47 automake --add-missing
48 aclocal -I m4 48 aclocal -I m4
49 autoheader
49 automake 50 automake
50 autoheader 51 autoheader
51 autoconf 52 autoconf
@@ -101,9 +102,10 @@ function buildlzma {
101function buildit { 102function buildit {
102 local host=$1 103 local host=$1
103 local march=$2 104 local march=$2
104 local offsetbits=$3 105 local usizebits=$3
105 local cargs=$4 106 local offsetbits=$4
106 local afl=$5 107 local cargs=$5
108 local afl=$6
107 local BM="${host}${march}" 109 local BM="${host}${march}"
108 local USECC="${CC}" 110 local USECC="${CC}"
109 local USECXX="${CXX}" 111 local USECXX="${CXX}"
@@ -115,8 +117,9 @@ function buildit {
115 BM="${BM}-afl" 117 BM="${BM}-afl"
116 fi 118 fi
117 119
118 local D="build/${BM}/xoff${offsetbits}" 120 local D="build/${BM}/usize${usizebits}/xoff${offsetbits}"
119 local BMD="${BM}-${offsetbits}" 121 local BMD="${BM}-${usizebits}-${offsetbits}"
122
120 local FULLD="${SRCDIR}/${D}" 123 local FULLD="${SRCDIR}/${D}"
121 local CFLAGS="${march} ${cargs} -I${SRCDIR}/build/lib-${LIBBM}/include" 124 local CFLAGS="${march} ${cargs} -I${SRCDIR}/build/lib-${LIBBM}/include"
122 local CXXFLAGS="${march} ${cargs} -I${SRCDIR}/build/lib-${LIBBM}/include" 125 local CXXFLAGS="${march} ${cargs} -I${SRCDIR}/build/lib-${LIBBM}/include"
@@ -199,8 +202,9 @@ function buildall {
199 echo "" 202 echo ""
200 203
201 buildlzma "$1" "$2" 204 buildlzma "$1" "$2"
202 buildit "$1" "$2" 32 "-DXD3_USE_LARGEFILE64=0 $3" "$4" 205 buildit "$1" "$2" 32 32 "-DXD3_USE_LARGESIZET=0 -DXD3_USE_LARGEFILE64=0 $3" "$4"
203 buildit "$1" "$2" 64 "-DXD3_USE_LARGEFILE64=1 $3" "$4" 206 buildit "$1" "$2" 32 64 "-DXD3_USE_LARGESIZET=0 -DXD3_USE_LARGEFILE64=1 $3" "$4"
207 buildit "$1" "$2" 64 64 "-DXD3_USE_LARGESIZET=1 -DXD3_USE_LARGEFILE64=1 $3" "$4"
204} 208}
205 209
206setup 210setup
diff --git a/xdelta3/testing/Makefile b/xdelta3/testing/Makefile
index f859f94..d0b9c9e 100644
--- a/xdelta3/testing/Makefile
+++ b/xdelta3/testing/Makefile
@@ -3,3 +3,6 @@ all:
3 3
4xdelta3regtest: 4xdelta3regtest:
5 (cd .. && make xdelta3regtest) 5 (cd .. && make xdelta3regtest)
6
7xdelta3checksum:
8 (cd .. && make xdelta3checksum)
diff --git a/xdelta3/testing/checksum_test.cc b/xdelta3/testing/checksum_test.cc
new file mode 100644
index 0000000..9418d24
--- /dev/null
+++ b/xdelta3/testing/checksum_test.cc
@@ -0,0 +1,756 @@
1/* Copyright (C) 2007 Josh MacDonald */
2
3#include "test.h"
4#include <assert.h>
5#include <list>
6#include <vector>
7#include <algorithm>
8
9#include "../cpp-btree/btree_map.h"
10
11extern "C" {
12uint32_t xd3_large32_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
13uint32_t xd3_large32_cksum_update_old (xd3_hash_cfg *cfg, uint32_t cksum,
14 const uint8_t *base, const usize_t look);
15
16uint64_t xd3_large64_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
17uint64_t xd3_large64_cksum_update_old (xd3_hash_cfg *cfg, uint64_t cksum,
18 const uint8_t *base, const usize_t look);
19}
20
21using btree::btree_map;
22using std::list;
23using std::vector;
24
25// MLCG parameters
26// a, a*
27uint32_t good_32bit_values[] = {
28 1597334677U, // ...
29 741103597U, 887987685U,
30};
31
32// a, a*
33uint64_t good_64bit_values[] = {
34 1181783497276652981ULL, 4292484099903637661ULL,
35 7664345821815920749ULL, // ...
36};
37
38void print_header() {
39 static int hdr_cnt = 0;
40 if (hdr_cnt++ % 20 == 0) {
41 printf("%-32sConf\t\tCount\tUniq\tFull\tCover\tColls"
42 "\tMB/s\tIters\t#Colls\n", "Name");
43 }
44}
45
46struct true_type { };
47struct false_type { };
48
49template <typename Word>
50usize_t bitsof();
51
52template<>
53usize_t bitsof<unsigned int>() {
54 return sizeof(unsigned int) * 8;
55}
56
57template<>
58usize_t bitsof<unsigned long>() {
59 return sizeof(unsigned long) * 8;
60}
61
62template<>
63usize_t bitsof<unsigned long long>() {
64 return sizeof(unsigned long long) * 8;
65}
66
67template <typename Word>
68struct hhash { // shift "s" bits leaving the high bits as a hash value for
69 // this checksum, which are the most "distant" in terms of the
70 // spectral test for the rabin_karp MLCG. For short windows,
71 // the high bits aren't enough, XOR "mask" worth of these in.
72 Word operator()(const Word t, const Word s, const Word mask) {
73 return (t >> s) ^ (t & mask);
74 }
75};
76
77template <typename Word>
78Word good_word();
79
80template<>
81uint32_t good_word<uint32_t>() {
82 return good_32bit_values[0];
83}
84
85template<>
86uint64_t good_word<uint64_t>() {
87 return good_64bit_values[0];
88}
89
90// CLASSES
91
92#define SELF Word, CksumSize, CksumSkip, Hash, Compaction
93#define MEMBER template <typename Word, \
94 int CksumSize, \
95 int CksumSkip, \
96 typename Hash, \
97 int Compaction>
98
99MEMBER
100struct cksum_params {
101 typedef Word word_type;
102 typedef Hash hash_type;
103
104 static const int cksum_size = CksumSize;
105 static const int cksum_skip = CksumSkip;
106 static const int compaction = Compaction;
107};
108
109MEMBER
110struct rabin_karp : public cksum_params<SELF> {
111 // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ...
112 rabin_karp()
113 : powers(make_powers()),
114 product(powers[0] * good_word<Word>()),
115 incr_state(0) { }
116
117 static Word* make_powers() {
118 Word *p = new Word[CksumSize];
119 p[CksumSize - 1] = 1;
120 for (int i = CksumSize - 2; i >= 0; i--) {
121 p[i] = p[i + 1] * good_word<Word>();
122 }
123 return p;
124 }
125
126 ~rabin_karp() {
127 delete [] powers;
128 }
129
130 Word step(const uint8_t *ptr) {
131 Word h = 0;
132 for (int i = 0; i < CksumSize; i++) {
133 h += (ptr[i]) * powers[i];
134 }
135 return h;
136 }
137
138 Word state0(const uint8_t *ptr) {
139 incr_state = step(ptr);
140 return incr_state;
141 }
142
143 Word incr(const uint8_t *ptr) {
144 incr_state = good_word<Word>() * incr_state -
145 product * (ptr[-1]) + (ptr[CksumSize - 1]);
146 return incr_state;
147 }
148
149 const Word *const powers;
150 const Word product;
151 Word incr_state;
152};
153
154MEMBER
155struct with_stream : public cksum_params<SELF> {
156 xd3_stream stream;
157
158 with_stream()
159 {
160 xd3_config cfg;
161 memset (&stream, 0, sizeof (stream));
162 xd3_init_config (&cfg, 0);
163 cfg.smatch_cfg = XD3_SMATCH_SOFT;
164 cfg.smatcher_soft.large_look = CksumSize;
165 cfg.smatcher_soft.large_step = CksumSkip;
166 cfg.smatcher_soft.small_look = 4;
167 cfg.smatcher_soft.small_chain = 4;
168 cfg.smatcher_soft.small_lchain = 4;
169 cfg.smatcher_soft.max_lazy = 4;
170 cfg.smatcher_soft.long_enough = 4;
171 CHECK_EQ(0, xd3_config_stream (&stream, &cfg));
172
173 CHECK_EQ(0, xd3_size_hashtable (&stream,
174 1<<10 /* ignored */,
175 stream.smatcher.large_look,
176 & stream.large_hash));
177 }
178 ~with_stream()
179 {
180 xd3_free_stream (&stream);
181 }
182};
183
184MEMBER
185struct large_cksum : public with_stream<SELF> {
186 Word step(const uint8_t *ptr) {
187 return xd3_large_cksum (&this->stream.large_hash, ptr, CksumSize);
188 }
189
190 Word state0(const uint8_t *ptr) {
191 incr_state = step(ptr);
192 return incr_state;
193 }
194
195 Word incr(const uint8_t *ptr) {
196 incr_state = xd3_large_cksum_update (&this->stream.large_hash,
197 incr_state, ptr - 1, CksumSize);
198 return incr_state;
199 }
200
201 Word incr_state;
202};
203
204#if SIZEOF_USIZE_T == 4
205#define xd3_large_cksum_old xd3_large32_cksum_old
206#define xd3_large_cksum_update_old xd3_large32_cksum_update_old
207#elif SIZEOF_USIZE_T == 8
208#define xd3_large_cksum_old xd3_large64_cksum_old
209#define xd3_large_cksum_update_old xd3_large64_cksum_update_old
210#endif
211
212MEMBER
213struct large_cksum_old : public with_stream<SELF> {
214 Word step(const uint8_t *ptr) {
215 return xd3_large_cksum_old (&this->stream.large_hash, ptr, CksumSize);
216 }
217
218 Word state0(const uint8_t *ptr) {
219 incr_state = step(ptr);
220 return incr_state;
221 }
222
223 Word incr(const uint8_t *ptr) {
224 incr_state = xd3_large_cksum_update_old (&this->stream.large_hash,
225 incr_state, ptr - 1, CksumSize);
226 return incr_state;
227 }
228
229 Word incr_state;
230};
231
232// TESTS
233
234template <typename Word>
235struct file_stats {
236 typedef const uint8_t* ptr_type;
237 typedef Word word_type;
238 typedef btree::btree_multimap<word_type, ptr_type> table_type;
239 typedef typename table_type::iterator table_iterator;
240
241 usize_t cksum_size;
242 usize_t cksum_skip;
243 usize_t unique;
244 usize_t unique_values;
245 usize_t count;
246 table_type table;
247
248 file_stats(usize_t size, usize_t skip)
249 : cksum_size(size),
250 cksum_skip(skip),
251 unique(0),
252 unique_values(0),
253 count(0) {
254 }
255
256 void reset() {
257 unique = 0;
258 unique_values = 0;
259 count = 0;
260 table.clear();
261 }
262
263 void update(word_type word, ptr_type ptr) {
264 table_iterator t_i = table.find(word);
265
266 count++;
267 if (t_i != table.end()) {
268 int collisions = 0;
269 for (table_iterator p_i = t_i;
270 p_i != table.end() && p_i->first == word;
271 ++p_i) {
272 if (memcmp(p_i->second, ptr, cksum_size) == 0) {
273 return;
274 }
275 collisions++;
276 }
277 if (collisions >= 1000) {
278 fprintf(stderr, "Something is not right, lots of collisions=%d\n",
279 collisions);
280 abort();
281 }
282 } else {
283 unique_values++;
284 }
285 unique++;
286 table.insert(std::make_pair(word, ptr));
287 return;
288 }
289
290 void freeze() {
291 table.clear();
292 }
293};
294
295struct test_result_base;
296
297static vector<test_result_base*> all_tests;
298
299struct test_result_base {
300 virtual ~test_result_base() {
301 }
302 virtual void reset() = 0;
303 virtual void print() = 0;
304 virtual void get(const uint8_t* buf, const size_t buf_size,
305 usize_t iters) = 0;
306 virtual void stat() = 0;
307 virtual usize_t count() = 0;
308 virtual usize_t dups() = 0;
309 virtual double uniqueness() = 0;
310 virtual double fullness() = 0;
311 virtual double collisions() = 0;
312 virtual double coverage() = 0;
313 virtual double compression() = 0;
314 virtual double time() = 0;
315 virtual double total_time() = 0;
316 virtual usize_t total_count() = 0;
317 virtual usize_t total_dups() = 0;
318};
319
320template <typename Checksum>
321struct test_result : public test_result_base {
322 Checksum cksum;
323 const char *test_name;
324 file_stats<typename Checksum::word_type> fstats;
325 usize_t test_size;
326 usize_t n_steps;
327 usize_t n_incrs;
328 typename Checksum::word_type s_bits;
329 typename Checksum::word_type s_mask;
330 usize_t t_entries;
331 usize_t h_bits;
332 usize_t h_buckets_full;
333 char *hash_table;
334 long accum_millis;
335 usize_t accum_iters;
336
337 // These are not reset
338 double accum_time;
339 usize_t accum_count;
340 usize_t accum_dups;
341 usize_t accum_colls;
342 size_t accum_size;
343
344 test_result(const char *name)
345 : test_name(name),
346 fstats(Checksum::cksum_size, Checksum::cksum_skip),
347 hash_table(NULL),
348 accum_millis(0),
349 accum_iters(0),
350 accum_time(0.0),
351 accum_count(0),
352 accum_dups(0),
353 accum_colls(0),
354 accum_size(0) {
355 all_tests.push_back(this);
356 }
357
358 ~test_result() {
359 reset();
360 }
361
362 void reset() {
363 // size of file
364 test_size = 0;
365
366 // count
367 n_steps = 0;
368 n_incrs = 0;
369
370 // four values used by new_table()/summarize_table()
371 s_bits = 0;
372 s_mask = 0;
373 t_entries = 0;
374 h_bits = 0;
375 h_buckets_full = 0;
376
377 accum_millis = 0;
378 accum_iters = 0;
379
380 fstats.reset();
381
382 // temporary
383 if (hash_table) {
384 delete(hash_table);
385 hash_table = NULL;
386 }
387 }
388
389 usize_t count() {
390 if (Checksum::cksum_skip == 1) {
391 return n_incrs;
392 } else {
393 return n_steps;
394 }
395 }
396
397 usize_t dups() {
398 return fstats.count - fstats.unique;
399 }
400
401 /* Fraction of distinct strings of length cksum_size which are not
402 * represented in the hash table. */
403 double collisions() {
404 return (fstats.unique - fstats.unique_values) / (double) fstats.unique;
405 }
406 usize_t colls() {
407 return (fstats.unique - fstats.unique_values);
408 }
409
410 double uniqueness() {
411 return 1.0 - (double) dups() / count();
412 }
413
414 double fullness() {
415 return (double) h_buckets_full / (1 << h_bits);
416 }
417
418 double coverage() {
419 return (double) h_buckets_full / uniqueness() / count();
420 }
421
422 double compression() {
423 return 1.0 - coverage();
424 }
425
426 double time() {
427 return (double) accum_millis / accum_iters;
428 }
429
430 double total_time() {
431 return accum_time;
432 }
433
434 usize_t total_count() {
435 return accum_count;
436 }
437
438 usize_t total_dups() {
439 return accum_dups;
440 }
441
442 usize_t total_colls() {
443 return accum_dups;
444 }
445
446 void stat() {
447 accum_time += time();
448 accum_count += count();
449 accum_dups += dups();
450 accum_colls += colls();
451 accum_size += test_size;
452 }
453
454 void print() {
455 if (fstats.count != count()) {
456 fprintf(stderr, "internal error: %" W "d != %" W "d\n", fstats.count, count());
457 abort();
458 }
459 print_header();
460 printf("%-32s%d/%d 2^%" W "u\t%" W "u\t%0.4f\t%.4f\t%.4f\t%.1e\t%.2f\t"
461 "%" W "u\t%" W "u\n",
462 test_name,
463 Checksum::cksum_size,
464 Checksum::cksum_skip,
465 h_bits,
466 count(),
467 uniqueness(),
468 fullness(),
469 coverage(),
470 collisions(),
471 0.001 * accum_iters * test_size / accum_millis,
472 accum_iters,
473 colls());
474 }
475
476 usize_t size_log2 (usize_t slots) {
477 usize_t bits = bitsof<typename Checksum::word_type>() - 1;
478 usize_t i;
479
480 for (i = 3; i <= bits; i += 1) {
481 if (slots <= (1U << i)) {
482 return i - Checksum::compaction;
483 }
484 }
485
486 return bits;
487 }
488
489 void new_table(usize_t entries) {
490 t_entries = entries;
491 h_bits = size_log2(entries);
492
493 usize_t n = 1 << h_bits;
494
495 s_bits = bitsof<typename Checksum::word_type>() - h_bits;
496 s_mask = n - 1U;
497
498 hash_table = new char[n / 8];
499 memset(hash_table, 0, n / 8);
500 }
501
502 int get_table_bit(usize_t i) {
503 return hash_table[i/8] & (1 << i%8);
504 }
505
506 int set_table_bit(usize_t i) {
507 return hash_table[i/8] |= (1 << i%8);
508 }
509
510 void summarize_table() {
511 usize_t n = 1 << h_bits;
512 usize_t f = 0;
513 for (usize_t i = 0; i < n; i++) {
514 if (get_table_bit(i)) {
515 f++;
516 }
517 }
518 h_buckets_full = f;
519 }
520
521 void get(const uint8_t* buf, const size_t buf_size, usize_t test_iters) {
522 typename Checksum::hash_type hash;
523 const uint8_t *ptr;
524 const uint8_t *end;
525 usize_t periods;
526 int64_t last_offset;
527 int64_t stop;
528
529 test_size = buf_size;
530 last_offset = buf_size - Checksum::cksum_size;
531
532 if (last_offset < 0) {
533 periods = 0;
534 n_steps = 0;
535 n_incrs = 0;
536 stop = -Checksum::cksum_size;
537 } else {
538 periods = last_offset / Checksum::cksum_skip;
539 n_steps = periods + 1;
540 n_incrs = last_offset + 1;
541 stop = last_offset - (periods + 1) * Checksum::cksum_skip;
542 }
543
544 // Compute file stats once.
545 if (fstats.unique_values == 0) {
546 if (Checksum::cksum_skip == 1) {
547 for (size_t i = 0; i <= buf_size - Checksum::cksum_size; i++) {
548 fstats.update(hash(cksum.step(buf + i), s_bits, s_mask), buf + i);
549 }
550 } else {
551 ptr = buf + last_offset;
552 end = buf + stop;
553
554 for (; ptr != end; ptr -= Checksum::cksum_skip) {
555 fstats.update(hash(cksum.step(ptr), s_bits, s_mask), ptr);
556 }
557 }
558 fstats.freeze();
559 }
560
561 long start_test = get_millisecs_now();
562
563 if (Checksum::cksum_skip != 1) {
564 new_table(n_steps);
565
566 for (usize_t i = 0; i < test_iters; i++) {
567 ptr = buf + last_offset;
568 end = buf + stop;
569
570 for (; ptr != end; ptr -= Checksum::cksum_skip) {
571 set_table_bit(hash(cksum.step(ptr), s_bits, s_mask));
572 }
573 }
574
575 summarize_table();
576 }
577
578 stop = buf_size - Checksum::cksum_size + 1;
579 if (stop < 0) {
580 stop = 0;
581 }
582
583 if (Checksum::cksum_skip == 1) {
584 new_table(n_incrs);
585
586 for (usize_t i = 0; i < test_iters; i++) {
587 ptr = buf;
588 end = buf + stop;
589
590 if (ptr != end) {
591 set_table_bit(hash(cksum.state0(ptr++), s_bits, s_mask));
592 }
593
594 for (; ptr != end; ptr++) {
595 typename Checksum::word_type w = cksum.incr(ptr);
596 CHECK_EQ(w, cksum.step(ptr));
597 set_table_bit(hash(w, s_bits, s_mask));
598 }
599 }
600
601 summarize_table();
602 }
603
604 accum_iters += test_iters;
605 accum_millis += get_millisecs_now() - start_test;
606 }
607};
608
609static int read_whole_file(const char *name,
610 uint8_t **buf_ptr,
611 size_t *buf_len) {
612 main_file file;
613 int ret;
614 xoff_t len;
615 size_t nread;
616 main_file_init(&file);
617 file.filename = name;
618 ret = main_file_open(&file, name, XO_READ);
619 if (ret != 0) {
620 fprintf(stderr, "open failed\n");
621 goto exit;
622 }
623 ret = main_file_stat(&file, &len);
624 if (ret != 0) {
625 fprintf(stderr, "stat failed\n");
626 goto exit;
627 }
628
629 (*buf_len) = (size_t)len;
630 (*buf_ptr) = (uint8_t*) main_malloc(*buf_len);
631 ret = main_file_read(&file, *buf_ptr, *buf_len, &nread,
632 "read failed");
633 if (ret == 0 && *buf_len == nread) {
634 ret = 0;
635 } else {
636 fprintf(stderr, "invalid read\n");
637 ret = XD3_INTERNAL;
638 }
639 exit:
640 main_file_cleanup(&file);
641 return ret;
642}
643
644int main(int argc, char** argv) {
645 int i;
646 uint8_t *buf = NULL;
647 size_t buf_len = 0;
648 int ret;
649
650 if (argc <= 1) {
651 fprintf(stderr, "usage: %s file ...\n", argv[0]);
652 return 1;
653 }
654
655// TODO: The xdelta3-hash.h code is identical now; add sameness test.
656// using rabin_karp<> template.
657#define TEST(T,Z,S,C) \
658 test_result<large_cksum<T,Z,S,hhash<T>,C>> \
659 _xck_ ## T ## _ ## Z ## _ ## S ## _ ## C \
660 ("xck_" #T "_" #Z "_" #S "_" #C); \
661 test_result<large_cksum_old<T,Z,S,hhash<T>,C>> \
662 _old_ ## T ## _ ## Z ## _ ## S ## _ ## C \
663 ("old_" #T "_" #Z "_" #S "_" #C)
664
665#define TESTS(SIZE, SKIP) \
666 TEST(usize_t, SIZE, SKIP, 1); \
667 TEST(usize_t, SIZE, SKIP, 2)
668
669 TESTS(5, 1);
670 TESTS(6, 1);
671 TESTS(7, 1);
672 TESTS(8, 1);
673 TESTS(9, 1);
674 TESTS(10, 1);
675 TESTS(11, 1);
676 TESTS(12, 1);
677 TESTS(13, 1);
678 TESTS(14, 1);
679 TESTS(15, 1);
680 TESTS(16, 1);
681 TESTS(17, 1);
682 TESTS(18, 1);
683 TESTS(19, 1);
684 TESTS(20, 1);
685 TESTS(21, 1);
686 TESTS(22, 1);
687 TESTS(23, 1);
688 TESTS(24, 1);
689 TESTS(25, 1);
690 TESTS(26, 1);
691 TESTS(27, 1);
692 TESTS(28, 1);
693 TESTS(29, 1);
694 TESTS(30, 1);
695 TESTS(31, 1);
696 TESTS(32, 1);
697 TESTS(33, 1);
698 TESTS(34, 1);
699 TESTS(35, 1);
700 TESTS(36, 1);
701 TESTS(37, 1);
702 TESTS(38, 1);
703 TESTS(39, 1);
704
705
706 for (i = 1; i < argc; i++) {
707 if ((ret = read_whole_file(argv[i],
708 & buf,
709 & buf_len))) {
710 return 1;
711 }
712
713 fprintf(stderr, "file %s is %zu bytes\n",
714 argv[i], buf_len);
715
716 double min_time = -1.0;
717 double min_compression = 0.0;
718
719 for (vector<test_result_base*>::iterator iter = all_tests.begin();
720 iter != all_tests.end(); ++iter) {
721 test_result_base *test = *iter;
722 test->reset();
723
724 usize_t iters = 1;
725 long start_test = get_millisecs_now();
726
727 do {
728 test->get(buf, buf_len, iters);
729 iters *= 3;
730 iters /= 2;
731 } while (get_millisecs_now() - start_test < 2000);
732
733 test->stat();
734
735 if (min_time < 0.0) {
736 min_compression = test->compression();
737 min_time = test->time();
738 }
739
740 if (min_time > test->time()) {
741 min_time = test->time();
742 }
743
744 if (min_compression > test->compression()) {
745 min_compression = test->compression();
746 }
747
748 test->print();
749 }
750
751 main_free(buf);
752 buf = NULL;
753 }
754
755 return 0;
756}
diff --git a/xdelta3/testing/checksum_test_c.c b/xdelta3/testing/checksum_test_c.c
new file mode 100644
index 0000000..8f0507a
--- /dev/null
+++ b/xdelta3/testing/checksum_test_c.c
@@ -0,0 +1,174 @@
1#include "../xdelta3.c"
2
3// OLD CHECKSUM CODE
4
5#define PERMUTE32(x) (__single_hash32[x])
6#define PERMUTE64(x) (__single_hash64[x])
7
8const uint16_t __single_hash32[256] =
9{
10 /* This hashes the input alphabet (Scheme SLIB pseudo-random). */
11 0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46,
12 0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602,
13 0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6,
14 0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a,
15 0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58,
16 0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b,
17 0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94,
18 0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176,
19 0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743,
20 0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6,
21 0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227,
22 0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d,
23 0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a,
24 0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31,
25 0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9,
26 0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6,
27 0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f,
28 0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f,
29 0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49,
30 0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037,
31 0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad,
32 0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f,
33 0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d,
34 0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b,
35 0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624,
36 0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272,
37 0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806,
38 0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050,
39 0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113,
40 0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2,
41 0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25,
42 0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5
43};
44
45const uint32_t __single_hash64[256] =
46{
47 /* http://random.org 2014.10.24 */
48 0xd25e9f0a, 0xb1af9d5e, 0xb753dfa2, 0x157050f7, /* 0 */
49 0xc84b072c, 0xdd14fe7c, 0xf92208c3, 0xdf08a0c0,
50 0x63a5c118, 0x76f5d90f, 0xa2f8b93e, 0xb6c12d22,
51 0xaf074957, 0x966fb7d9, 0x62f7b785, 0xb40e8a09,
52 0x0a811d5d, 0x323a6daa, 0xb62f7c5b, 0xfdcb9a53,
53 0xf25a9067, 0x4506bc7a, 0xff58a74b, 0x5ae62817,
54 0x74097675, 0x722c0fd9, 0x116a2a66, 0x65f76728,
55 0x72c79651, 0xe043cf9d, 0x64b867c7, 0x6604834f,
56 0xcdca58a6, 0x0f164e2d, 0x24515f05, 0x632cdbf8,
57 0x18091d4a, 0x3eff4128, 0x673d1c33, 0xd8e10c71,
58 0x1a3edf11, 0xba52892f, 0xa56949e0, 0xf3e1dd77, /* 10 */
59 0x86fcbe3e, 0x138d66d0, 0x4fc98359, 0xc22e5dd6,
60 0xc59f2267, 0x6c6dd739, 0xe03da190, 0x07e8469c,
61 0xadcfb02c, 0x00d3b0d9, 0xa1f44918, 0x8bd84d87,
62 0x08ec9ec1, 0xbbcd156f, 0xb57718e3, 0x3177e752,
63 0xf52a4d70, 0xde7aaad9, 0x075f1da0, 0x21ba00c6,
64 0xb9469a5c, 0xcf08d5ba, 0x91ac9edc, 0xc6167b63,
65 0xc1974919, 0xc8c8d195, 0x4b1996dd, 0xeff8991c,
66 0xf7f66c6b, 0x25b012e2, 0x59d12a98, 0xea40d3cc,
67 0x41f9970b, 0xec48101a, 0xa3bdcf90, 0x99f16905,
68 0x27af6c97, 0xc849af37, 0x49cad89b, 0xf48c2278, /* 20 */
69 0x5529c3d8, 0x9e7d6dce, 0x16feb52d, 0xf1b0aca1,
70 0xaf28fccb, 0x48e4ce3c, 0xc4436617, 0x64524e3e,
71 0x61806681, 0x6384f2d7, 0x1172880f, 0x34a5ef5f,
72 0xcc8cc0a8, 0x66e8f100, 0x2866085f, 0xba9b1b2d,
73 0x51285949, 0x2be4b574, 0x889b1ef5, 0x3dbe920d,
74 0x9277a62f, 0x0584a9f6, 0x085d8fc4, 0x4b5d403d,
75 0x4e46ca78, 0x3294c2f9, 0x29313e70, 0xe4f09b24,
76 0xe73b331c, 0x072f5552, 0x2e390b78, 0xea0021ca,
77 0xd8f40320, 0xed0e16fd, 0x7de9cf7a, 0xf17e3d6c,
78 0x8df1bd85, 0x052cae67, 0x3486e512, 0x3a1c09b8, /* 30 */
79 0x6c2a7b4e, 0x83455753, 0xbc0353ac, 0x0ffe20b6,
80 0x5fdcef85, 0x010f506c, 0x595ce972, 0xe28680d0,
81 0xa7e216b2, 0xa392ee0f, 0x25b73faa, 0x2b1f4983,
82 0xeeaefe98, 0x1d3d9cbc, 0x6aebe97b, 0x8b7b3584,
83 0x9e6a9a07, 0xd37f1e99, 0x4ac2a441, 0x8ae9a213,
84 0x7d0e27d7, 0x5de54b9a, 0x8621de1f, 0xf0f2f866,
85 0xcb08d275, 0x49c3f87e, 0xd5ee68c1, 0x9802fc77,
86 0x68be6c5e, 0x65aa8c27, 0xf423d5f7, 0x10ec5502,
87 0x9909bce1, 0x509cdf1b, 0x338fea72, 0x2733e9bf,
88 0xf92f4fd7, 0x87738ea2, 0x931a8bbc, 0x0a5c9155, /* 40 */
89 0xbe5edd9b, 0xadbf5838, 0x0338f8d2, 0x290da210,
90 0x390c37d8, 0xe7cffae8, 0x20617ebe, 0x464322dd,
91 0x7b3c4e78, 0xac142dcb, 0x2d5cef76, 0xd8fe49fc,
92 0x60f4e9a9, 0x7473816f, 0x0dc35f39, 0x5eed80c1,
93 0x0cb55ab6, 0x1d3ac541, 0x13c7f529, 0x7bffdf4a,
94 0xe334785b, 0x85263ec1, 0xd132ae56, 0x7c868b9e,
95 0x47f60638, 0x1012b979, 0x81c31dd3, 0x1af868c8,
96 0x0c5d0742, 0xd1b3e1a2, 0x5873200a, 0xf848465c,
97 0x0fc4d596, 0x609c18af, 0xc9f5a480, 0xd1a94a84,
98 0xa1431a3f, 0x7de8bb1a, 0x25f1256b, 0x1dcc732c, /* 50 */
99 0x6aa1549a, 0xa2367281, 0x32f2a77e, 0x82e62a0f,
100 0x045cbb56, 0x74b2027c, 0xd71a32d9, 0x022e7cb5,
101 0xe99be177, 0x60222fdf, 0xd69681ca, 0x9008ee2c,
102 0x32923db4, 0xcf82bf97, 0x38960a5b, 0xb3503d5b,
103 0x9bd4c7f2, 0x33c029c8, 0x1ef504a3, 0xdb249d3b,
104 0x91e89676, 0x4ca43b36, 0x9191433c, 0x465d5dc4,
105 0xf4dcb118, 0x9d11dd00, 0xb592f058, 0xdbe5ce30,
106 0x74790d92, 0x779850a8, 0x7180d25b, 0xfa951d99,
107 0x5990935a, 0x921cb022, 0x3b7c39bc, 0x6a38a7c7,
108 0xdc22703b, 0x142bab3b, 0x4e3d9479, 0x44bb8482, /* 60 */
109 0x8043abce, 0xfebe832a, 0x8e6a2f98, 0x4d43c4fe,
110 0xd192a70a, 0x802f3c3a, 0x5d11bbab, 0x2665d241,
111 0xb3f3a680, 0x3a8d223f, 0xcf82cdb4, 0x4ed28743,
112};
113
114uint64_t
115xd3_large64_cksum_old (xd3_hash_cfg *ignore, const uint8_t *base, const usize_t look)
116{
117 static const uint64_t kBits = 32;
118 static const uint64_t kMask = 0xffffffff;
119 usize_t i = 0;
120 uint64_t low = 0;
121 uint64_t high = 0;
122
123 for (; i < look; i += 1)
124 {
125 low += PERMUTE64(*base++);
126 high += low;
127 }
128
129 return ((high & kMask) << kBits) | (low & kMask);
130}
131
132uint64_t
133xd3_large64_cksum_update_old (xd3_hash_cfg *ignore, const uint64_t cksum,
134 const uint8_t *base, const usize_t look)
135{
136 static const uint64_t kBits = 32;
137 static const uint64_t kMask = 0xffffffff;
138 uint64_t old_c = PERMUTE64(base[0]);
139 uint64_t new_c = PERMUTE64(base[look]);
140 uint64_t low = ((cksum & kMask) - old_c + new_c) & kMask;
141 uint64_t high = ((cksum >> kBits) - (old_c * look) + low) & kMask;
142 return (high << kBits) | low;
143}
144
145uint32_t
146xd3_large32_cksum_old (xd3_hash_cfg *ignore, const uint8_t *base, const usize_t look)
147{
148 static const uint32_t kBits = 16;
149 static const uint32_t kMask = 0xffff;
150 usize_t i = 0;
151 uint32_t low = 0;
152 uint32_t high = 0;
153
154 for (; i < look; i += 1)
155 {
156 low += PERMUTE32(*base++);
157 high += low;
158 }
159
160 return ((high & kMask) << kBits) | (low & kMask);
161}
162
163uint32_t
164xd3_large32_cksum_update_old (xd3_hash_cfg *ignore, const uint32_t cksum,
165 const uint8_t *base, const usize_t look)
166{
167 static const uint32_t kBits = 16;
168 static const uint32_t kMask = 0xffff;
169 uint32_t old_c = PERMUTE32(base[0]);
170 uint32_t new_c = PERMUTE32(base[look]);
171 uint32_t low = ((cksum & kMask) - old_c + new_c) & kMask;
172 uint32_t high = ((cksum >> kBits) - (old_c * look) + low) & kMask;
173 return (high << kBits) | low;
174}
diff --git a/xdelta3/testing/delta.h b/xdelta3/testing/delta.h
index b0cca7c..f874bf2 100644
--- a/xdelta3/testing/delta.h
+++ b/xdelta3/testing/delta.h
@@ -53,13 +53,13 @@ public:
53 xd3_winst &winst = stream_.whole_target.inst[i]; 53 xd3_winst &winst = stream_.whole_target.inst[i];
54 switch (winst.type) { 54 switch (winst.type) {
55 case XD3_RUN: 55 case XD3_RUN:
56 DP(RINT "%"Q"u run %u\n", winst.position, winst.size); 56 DP(RINT "%"Q" run %u\n", winst.position, winst.size);
57 break; 57 break;
58 case XD3_ADD: 58 case XD3_ADD:
59 DP(RINT "%"Q"u add %u\n", winst.position, winst.size); 59 DP(RINT "%"Q" add %u\n", winst.position, winst.size);
60 break; 60 break;
61 default: 61 default:
62 DP(RINT "%"Q"u copy %u @ %"Q"u (mode %u)\n", 62 DP(RINT "%"Q" copy %u @ %"Q" (mode %u)\n",
63 winst.position, winst.size, winst.addr, winst.mode); 63 winst.position, winst.size, winst.addr, winst.mode);
64 break; 64 break;
65 } 65 }
diff --git a/xdelta3/testing/regtest.cc b/xdelta3/testing/regtest.cc
index b2cdaa5..e83af4b 100644
--- a/xdelta3/testing/regtest.cc
+++ b/xdelta3/testing/regtest.cc
@@ -102,8 +102,8 @@ public:
102 102
103 xoff_t blks = target_iterator.Blocks(); 103 xoff_t blks = target_iterator.Blocks();
104 104
105 IF_DEBUG2(XPR(NTR "target in %s: %"Q"u..%"Q"u %"Q"u(%"Q"u) " 105 IF_DEBUG2(XPR(NTR "target in %s: %"Q"..%"Q" %"Q"(%"Q") "
106 "verified %"Q"u\n", 106 "verified %"Q"\n",
107 encoding ? "encoding" : "decoding", 107 encoding ? "encoding" : "decoding",
108 target_iterator.Offset(), 108 target_iterator.Offset(),
109 target_iterator.Offset() + target_block.Size(), 109 target_iterator.Offset() + target_block.Size(),
diff --git a/xdelta3/testing/run_release.sh b/xdelta3/testing/run_release.sh
new file mode 100755
index 0000000..85ed1f7
--- /dev/null
+++ b/xdelta3/testing/run_release.sh
@@ -0,0 +1,2 @@
1#!/bin/sh
2(cd .. && ./run_release.sh)
diff --git a/xdelta3/testing/test.h b/xdelta3/testing/test.h
index 1eabcbd..7de24fb 100644
--- a/xdelta3/testing/test.h
+++ b/xdelta3/testing/test.h
@@ -18,8 +18,8 @@ extern "C" {
18 18
19#define CHECK_OP(x,y,OP) \ 19#define CHECK_OP(x,y,OP) \
20 do { \ 20 do { \
21 typeof(x) _x(x); \ 21 __typeof__(x) _x(x); \
22 typeof(x) _y(y); \ 22 __typeof__(x) _y(y); \
23 if (!(_x OP _y)) { \ 23 if (!(_x OP _y)) { \
24 cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x " " #OP " " #y << endl; \ 24 cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x " " #OP " " #y << endl; \
25 cerr << __FILE__ << ":" << __LINE__ << " {0} " << _x << endl; \ 25 cerr << __FILE__ << ":" << __LINE__ << " {0} " << _x << endl; \
@@ -68,5 +68,3 @@ pair<T, U> make_pair(const T& t, const U& u) {
68 68
69using std::min; 69using std::min;
70using std::max; 70using std::max;
71
72
diff --git a/xdelta3/xdelta3-blkcache.h b/xdelta3/xdelta3-blkcache.h
index f8fa8e2..20a2a4a 100644
--- a/xdelta3/xdelta3-blkcache.h
+++ b/xdelta3/xdelta3-blkcache.h
@@ -26,6 +26,9 @@
26typedef struct _main_blklru main_blklru; 26typedef struct _main_blklru main_blklru;
27typedef struct _main_blklru_list main_blklru_list; 27typedef struct _main_blklru_list main_blklru_list;
28 28
29
30#define XD3_INVALID_OFFSET XOFF_T_MAX
31
29struct _main_blklru_list 32struct _main_blklru_list
30{ 33{
31 main_blklru_list *next; 34 main_blklru_list *next;
@@ -125,7 +128,7 @@ main_set_source (xd3_stream *stream, xd3_cmd cmd,
125 /* Note: The API requires a power-of-two blocksize and srcwinsz 128 /* Note: The API requires a power-of-two blocksize and srcwinsz
126 * (-B). The logic here will use a single block if the entire file 129 * (-B). The logic here will use a single block if the entire file
127 * is known to fit into srcwinsz. */ 130 * is known to fit into srcwinsz. */
128 option_srcwinsz = xd3_pow2_roundup (option_srcwinsz); 131 option_srcwinsz = xd3_xoff_roundup (option_srcwinsz);
129 132
130 /* Though called "lru", it is not LRU-specific. We always allocate 133 /* Though called "lru", it is not LRU-specific. We always allocate
131 * a maximum number of source block buffers. If the entire file 134 * a maximum number of source block buffers. If the entire file
@@ -153,7 +156,7 @@ main_set_source (xd3_stream *stream, xd3_cmd cmd,
153 * is the point at which external decompression may begin. Set the 156 * is the point at which external decompression may begin. Set the
154 * system for a single block. */ 157 * system for a single block. */
155 lru_size = 1; 158 lru_size = 1;
156 lru[0].blkno = (xoff_t) -1; 159 lru[0].blkno = XD3_INVALID_OFFSET;
157 blksize = option_srcwinsz; 160 blksize = option_srcwinsz;
158 main_blklru_list_push_back (& lru_list, & lru[0]); 161 main_blklru_list_push_back (& lru_list, & lru[0]);
159 XD3_ASSERT (blksize != 0); 162 XD3_ASSERT (blksize != 0);
@@ -162,7 +165,7 @@ main_set_source (xd3_stream *stream, xd3_cmd cmd,
162 source->blksize = blksize; 165 source->blksize = blksize;
163 source->name = sfile->filename; 166 source->name = sfile->filename;
164 source->ioh = sfile; 167 source->ioh = sfile;
165 source->curblkno = (xoff_t) -1; 168 source->curblkno = XD3_INVALID_OFFSET;
166 source->curblk = NULL; 169 source->curblk = NULL;
167 source->max_winsize = option_srcwinsz; 170 source->max_winsize = option_srcwinsz;
168 171
@@ -258,7 +261,7 @@ main_set_source (xd3_stream *stream, xd3_cmd cmd,
258 261
259 if (option_verbose > 1) 262 if (option_verbose > 1)
260 { 263 {
261 short_sprintf (nbufs, " #bufs %u", lru_size); 264 short_sprintf (nbufs, " #bufs %"W"u", lru_size);
262 } 265 }
263 266
264 XPR(NT "source %s %s blksize %s window %s%s%s\n", 267 XPR(NT "source %s %s blksize %s window %s%s%s\n",
@@ -293,7 +296,7 @@ main_getblk_lru (xd3_source *source, xoff_t blkno,
293 return 0; 296 return 0;
294 } 297 }
295 /* No going backwards in a sequential scan. */ 298 /* No going backwards in a sequential scan. */
296 if (blru->blkno != (xoff_t) -1 && blru->blkno > blkno) 299 if (blru->blkno != XD3_INVALID_OFFSET && blru->blkno > blkno)
297 { 300 {
298 return XD3_TOOFARBACK; 301 return XD3_TOOFARBACK;
299 } 302 }
@@ -333,7 +336,7 @@ main_getblk_lru (xd3_source *source, xoff_t blkno,
333 lru_filled += 1; 336 lru_filled += 1;
334 (*is_new) = 1; 337 (*is_new) = 1;
335 (*blrup) = blru; 338 (*blrup) = blru;
336 blru->blkno = -1; 339 blru->blkno = XD3_INVALID_OFFSET;
337 return 0; 340 return 0;
338} 341}
339 342
@@ -443,7 +446,7 @@ main_read_seek_source (xd3_stream *stream,
443 sfile->source_position += nread; 446 sfile->source_position += nread;
444 blru->size = nread; 447 blru->size = nread;
445 448
446 IF_DEBUG1 (DP(RINT "[getblk] skip blkno %"Q"u size %u\n", 449 IF_DEBUG1 (DP(RINT "[getblk] skip blkno %"Q" size %u\n",
447 skip_blkno, blru->size)); 450 skip_blkno, blru->size));
448 451
449 XD3_ASSERT (sfile->source_position <= pos); 452 XD3_ASSERT (sfile->source_position <= pos);
@@ -524,7 +527,7 @@ main_getblk_func (xd3_stream *stream,
524 527
525 if (option_verbose > 3) 528 if (option_verbose > 3)
526 { 529 {
527 if (blru->blkno != (xoff_t)-1) 530 if (blru->blkno != XD3_INVALID_OFFSET)
528 { 531 {
529 if (blru->blkno != blkno) 532 if (blru->blkno != blkno)
530 { 533 {
diff --git a/xdelta3/xdelta3-decode.h b/xdelta3/xdelta3-decode.h
index 65877a4..e71b624 100644
--- a/xdelta3/xdelta3-decode.h
+++ b/xdelta3/xdelta3-decode.h
@@ -288,13 +288,14 @@ xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst)
288 { 288 {
289 IF_DEBUG2 ({ 289 IF_DEBUG2 ({
290 static int cnt = 0; 290 static int cnt = 0;
291 XPR(NT "DECODE:%u: COPY at %"Q"u (winoffset %u) size %u winaddr %u\n", 291 XPR(NT "DECODE:%u: COPY at %"Q" (winoffset %"Z") "
292 cnt++, 292 "size %"Z" winaddr %"Z"\n",
293 stream->total_out + (stream->dec_position - 293 cnt++,
294 stream->dec_cpylen), 294 stream->total_out + (stream->dec_position -
295 (stream->dec_position - stream->dec_cpylen), 295 stream->dec_cpylen),
296 inst->size, 296 (stream->dec_position - stream->dec_cpylen),
297 inst->addr); 297 inst->size,
298 inst->addr);
298 }); 299 });
299 300
300 if ((ret = xd3_decode_address (stream, 301 if ((ret = xd3_decode_address (stream,
@@ -329,7 +330,7 @@ xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst)
329 if (inst->type == XD3_ADD) 330 if (inst->type == XD3_ADD)
330 { 331 {
331 static int cnt; 332 static int cnt;
332 XPR(NT "DECODE:%d: ADD at %"Q"u (winoffset %u) size %u\n", 333 XPR(NT "DECODE:%d: ADD at %"Q" (winoffset %"Z") size %"Z"\n",
333 cnt++, 334 cnt++,
334 (stream->total_out + stream->dec_position - stream->dec_cpylen), 335 (stream->total_out + stream->dec_position - stream->dec_cpylen),
335 stream->dec_position - stream->dec_cpylen, 336 stream->dec_position - stream->dec_cpylen,
@@ -339,7 +340,7 @@ xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst)
339 { 340 {
340 static int cnt; 341 static int cnt;
341 XD3_ASSERT (inst->type == XD3_RUN); 342 XD3_ASSERT (inst->type == XD3_RUN);
342 XPR(NT "DECODE:%d: RUN at %"Q"u (winoffset %u) size %u\n", 343 XPR(NT "DECODE:%d: RUN at %"Q" (winoffset %"Z") size %"Z"\n",
343 cnt++, 344 cnt++,
344 stream->total_out + stream->dec_position - stream->dec_cpylen, 345 stream->total_out + stream->dec_position - stream->dec_cpylen,
345 stream->dec_position - stream->dec_cpylen, 346 stream->dec_position - stream->dec_cpylen,
@@ -525,8 +526,8 @@ xd3_decode_output_halfinst (xd3_stream *stream, xd3_hinst *inst)
525 if ((source->onblk != blksize) && 526 if ((source->onblk != blksize) &&
526 (blkoff + take > source->onblk)) 527 (blkoff + take > source->onblk))
527 { 528 {
528 IF_DEBUG1 (XPR(NT "[srcfile] short at blkno %"Q"u onblk " 529 IF_DEBUG1 (XPR(NT "[srcfile] short at blkno %"Q" onblk "
529 "%u blksize %u blkoff %u take %u\n", 530 "%"Z" blksize %"Z" blkoff %"Z" take %"Z"\n",
530 block, 531 block,
531 source->onblk, 532 source->onblk,
532 blksize, 533 blksize,
@@ -1008,7 +1009,7 @@ xd3_decode_input (xd3_stream *stream)
1008 1009
1009 stream->dec_state = DEC_CPYLEN; 1010 stream->dec_state = DEC_CPYLEN;
1010 1011
1011 IF_DEBUG2 (DP(RINT "--------- TARGET WINDOW %"Q"u -----------\n", 1012 IF_DEBUG2 (DP(RINT "--------- TARGET WINDOW %"Q" -----------\n",
1012 stream->current_window)); 1013 stream->current_window));
1013 } 1014 }
1014 1015
@@ -1025,7 +1026,6 @@ xd3_decode_input (xd3_stream *stream)
1025 1026
1026 case DEC_CPYOFF: 1027 case DEC_CPYOFF:
1027 /* Copy window offset: only if VCD_SOURCE or VCD_TARGET is set */ 1028 /* Copy window offset: only if VCD_SOURCE or VCD_TARGET is set */
1028
1029 OFFSET_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpyoff, 1029 OFFSET_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpyoff,
1030 DEC_ENCLEN); 1030 DEC_ENCLEN);
1031 1031
@@ -1039,7 +1039,7 @@ xd3_decode_input (xd3_stream *stream)
1039 /* Check copy window bounds: VCD_TARGET window may not exceed 1039 /* Check copy window bounds: VCD_TARGET window may not exceed
1040 current position. */ 1040 current position. */
1041 if ((stream->dec_win_ind & VCD_TARGET) && 1041 if ((stream->dec_win_ind & VCD_TARGET) &&
1042 (stream->dec_cpyoff + (xoff_t) stream->dec_cpylen > 1042 (stream->dec_cpyoff + stream->dec_cpylen >
1043 stream->dec_winstart)) 1043 stream->dec_winstart))
1044 { 1044 {
1045 stream->msg = "VCD_TARGET window out of bounds"; 1045 stream->msg = "VCD_TARGET window out of bounds";
@@ -1179,7 +1179,7 @@ xd3_decode_input (xd3_stream *stream)
1179 /* xd3_decode_emit returns XD3_OUTPUT on every success. */ 1179 /* xd3_decode_emit returns XD3_OUTPUT on every success. */
1180 if ((ret = xd3_decode_emit (stream)) == XD3_OUTPUT) 1180 if ((ret = xd3_decode_emit (stream)) == XD3_OUTPUT)
1181 { 1181 {
1182 stream->total_out += (xoff_t) stream->avail_out; 1182 stream->total_out += stream->avail_out;
1183 } 1183 }
1184 1184
1185 return ret; 1185 return ret;
diff --git a/xdelta3/xdelta3-djw.h b/xdelta3/xdelta3-djw.h
index f69cb1d..414702a 100644
--- a/xdelta3/xdelta3-djw.h
+++ b/xdelta3/xdelta3-djw.h
@@ -343,7 +343,7 @@ static inline void
343djw_update_1_2 (int *mtf_run, usize_t *mtf_i, 343djw_update_1_2 (int *mtf_run, usize_t *mtf_i,
344 uint8_t *mtfsym, djw_weight *freq) 344 uint8_t *mtfsym, djw_weight *freq)
345{ 345{
346 int code; 346 uint8_t code;
347 347
348 do 348 do
349 { 349 {
@@ -395,10 +395,10 @@ djw_build_prefix (const djw_weight *freq, uint8_t *clen, usize_t asize, usize_t
395 usize_t heap_last; /* Index of the last _valid_ heap entry. */ 395 usize_t heap_last; /* Index of the last _valid_ heap entry. */
396 usize_t ents_size; /* Number of entries, including 0th fake entry */ 396 usize_t ents_size; /* Number of entries, including 0th fake entry */
397 usize_t overflow; /* Number of code lengths that overflow */ 397 usize_t overflow; /* Number of code lengths that overflow */
398 uint32_t total_bits; 398 usize_t total_bits;
399 usize_t i; 399 usize_t i;
400 400
401 IF_DEBUG (uint32_t first_bits = 0); 401 IF_DEBUG (usize_t first_bits = 0);
402 402
403 /* Insert real symbol frequences. */ 403 /* Insert real symbol frequences. */
404 for (i = 0; i < asize; i += 1) 404 for (i = 0; i < asize; i += 1)
@@ -444,7 +444,7 @@ djw_build_prefix (const djw_weight *freq, uint8_t *clen, usize_t asize, usize_t
444 if (heap_last == 1) 444 if (heap_last == 1)
445 { 445 {
446 /* Pick either the first or last symbol. */ 446 /* Pick either the first or last symbol. */
447 int s = freq[0] ? asize-1 : 0; 447 usize_t s = freq[0] ? asize-1 : 0;
448 ents[s+1].freq = 1; 448 ents[s+1].freq = 1;
449 goto again; 449 goto again;
450 } 450 }
@@ -493,8 +493,8 @@ djw_build_prefix (const djw_weight *freq, uint8_t *clen, usize_t asize, usize_t
493 { 493 {
494 IF_DEBUG2 (if (first_bits != total_bits) 494 IF_DEBUG2 (if (first_bits != total_bits)
495 { 495 {
496 DP(RINT "code length overflow changed %u bits\n", 496 DP(RINT "code length overflow changed %"Z" bits\n",
497 (usize_t)(total_bits - first_bits)); 497 total_bits - first_bits);
498 }); 498 });
499 return total_bits; 499 return total_bits;
500 } 500 }
diff --git a/xdelta3/xdelta3-fgk.h b/xdelta3/xdelta3-fgk.h
index 7011500..81d5e7e 100644
--- a/xdelta3/xdelta3-fgk.h
+++ b/xdelta3/xdelta3-fgk.h
@@ -109,7 +109,7 @@ static fgk_stream* fgk_alloc (xd3_stream *stream /*, usize_t alpha
109static int fgk_init (xd3_stream *stream, 109static int fgk_init (xd3_stream *stream,
110 fgk_stream *h, 110 fgk_stream *h,
111 int is_encode); 111 int is_encode);
112static int fgk_encode_data (fgk_stream *h, 112static usize_t fgk_encode_data (fgk_stream *h,
113 usize_t n); 113 usize_t n);
114static inline fgk_bit fgk_get_encoded_bit (fgk_stream *h); 114static inline fgk_bit fgk_get_encoded_bit (fgk_stream *h);
115 115
@@ -125,7 +125,7 @@ static int xd3_encode_fgk (xd3_stream *stream,
125 125
126static inline int fgk_decode_bit (fgk_stream *h, 126static inline int fgk_decode_bit (fgk_stream *h,
127 fgk_bit b); 127 fgk_bit b);
128static int fgk_decode_data (fgk_stream *h); 128static usize_t fgk_decode_data (fgk_stream *h);
129static void fgk_destroy (xd3_stream *stream, 129static void fgk_destroy (xd3_stream *stream,
130 fgk_stream *h); 130 fgk_stream *h);
131 131
@@ -234,7 +234,7 @@ static void fgk_swap_ptrs(fgk_node **one, fgk_node **two)
234 234
235/* Takes huffman transmitter h and n, the nth elt in the alphabet, and 235/* Takes huffman transmitter h and n, the nth elt in the alphabet, and
236 * returns the number of required to encode n. */ 236 * returns the number of required to encode n. */
237static int fgk_encode_data (fgk_stream* h, usize_t n) 237static usize_t fgk_encode_data (fgk_stream* h, usize_t n)
238{ 238{
239 fgk_node *target_ptr = h->alphabet + n; 239 fgk_node *target_ptr = h->alphabet + n;
240 240
@@ -249,8 +249,8 @@ static int fgk_encode_data (fgk_stream* h, usize_t n)
249 * is not neccesary to encode these bits. */ 249 * is not neccesary to encode these bits. */
250 if (IS_ADAPTIVE && target_ptr->weight == 0) 250 if (IS_ADAPTIVE && target_ptr->weight == 0)
251 { 251 {
252 unsigned int where, shift; 252 usize_t where, shift;
253 int bits; 253 usize_t bits;
254 254
255 where = fgk_find_nth_zero(h, n); 255 where = fgk_find_nth_zero(h, n);
256 shift = 1; 256 shift = 1;
@@ -734,7 +734,7 @@ static usize_t fgk_nth_zero (fgk_stream* h, usize_t n)
734 * alphabet otherwise this returns 0, indicating more bits are 734 * alphabet otherwise this returns 0, indicating more bits are
735 * required. 735 * required.
736 */ 736 */
737static int fgk_decode_data (fgk_stream* h) 737static usize_t fgk_decode_data (fgk_stream* h)
738{ 738{
739 usize_t elt = (usize_t)(h->decode_ptr - h->alphabet); 739 usize_t elt = (usize_t)(h->decode_ptr - h->alphabet);
740 740
diff --git a/xdelta3/xdelta3-hash.h b/xdelta3/xdelta3-hash.h
index e359436..c112b5a 100644
--- a/xdelta3/xdelta3-hash.h
+++ b/xdelta3/xdelta3-hash.h
@@ -1,5 +1,6 @@
1/* xdelta 3 - delta compression tools and library 1/* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007. Joshua P. MacDonald 2 * Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007, 2011, 2012, 2014.
3 * Joshua P. MacDonald
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -19,6 +20,8 @@
19#ifndef _XDELTA3_HASH_H_ 20#ifndef _XDELTA3_HASH_H_
20#define _XDELTA3_HASH_H_ 21#define _XDELTA3_HASH_H_
21 22
23#include "xdelta3-internal.h"
24
22#if XD3_DEBUG 25#if XD3_DEBUG
23#define SMALL_HASH_DEBUG1(s,inp) \ 26#define SMALL_HASH_DEBUG1(s,inp) \
24 uint32_t debug_state; \ 27 uint32_t debug_state; \
@@ -32,45 +35,18 @@
32#define SMALL_HASH_DEBUG2(s,inp) 35#define SMALL_HASH_DEBUG2(s,inp)
33#endif /* XD3_DEBUG */ 36#endif /* XD3_DEBUG */
34 37
35/* This is a good hash multiplier for 32-bit LCGs: see "linear
36 * congruential generators of different sizes and good lattice
37 * structure" */
38static const uint32_t hash_multiplier = 1597334677U;
39
40/***********************************************************************
41 Permute stuff
42 ***********************************************************************/
43
44#if HASH_PERMUTE == 0
45#define PERMUTE(x) (x)
46#else
47#define PERMUTE(x) (__single_hash[(uint32_t)x])
48
49extern const uint16_t __single_hash[256];
50#endif
51
52/* Update the checksum state. */
53#if ADLER_LARGE_CKSUM
54inline uint32_t
55xd3_large_cksum_update (uint32_t cksum,
56 const uint8_t *base,
57 usize_t look) {
58 uint32_t old_c = PERMUTE(base[0]);
59 uint32_t new_c = PERMUTE(base[look]);
60 uint32_t low = ((cksum & 0xffff) - old_c + new_c) & 0xffff;
61 uint32_t high = ((cksum >> 16) - (old_c * look) + low) & 0xffff;
62 return (high << 16) | low;
63}
64#else
65/* TODO: revisit this topic */
66#endif
67
68#if UNALIGNED_OK 38#if UNALIGNED_OK
69#define UNALIGNED_READ32(dest,src) (*(dest)) = (*(uint32_t*)(src)) 39#define UNALIGNED_READ32(dest,src) (*(dest)) = (*(uint32_t*)(src))
70#else 40#else
71#define UNALIGNED_READ32(dest,src) memcpy((dest), (src), 4); 41#define UNALIGNED_READ32(dest,src) memcpy((dest), (src), 4);
72#endif 42#endif
73 43
44/* These are good hash multipliers for 32-bit and 64-bit LCGs: see
45 * "linear congruential generators of different sizes and good lattice
46 * structure" */
47#define xd3_hash_multiplier32 1597334677U
48#define xd3_hash_multiplier64 1181783497276652981ULL
49
74/* TODO: small cksum is hard-coded for 4 bytes (i.e., "look" is unused) */ 50/* TODO: small cksum is hard-coded for 4 bytes (i.e., "look" is unused) */
75static inline uint32_t 51static inline uint32_t
76xd3_scksum (uint32_t *state, 52xd3_scksum (uint32_t *state,
@@ -78,7 +54,7 @@ xd3_scksum (uint32_t *state,
78 const usize_t look) 54 const usize_t look)
79{ 55{
80 UNALIGNED_READ32(state, base); 56 UNALIGNED_READ32(state, base);
81 return (*state) * hash_multiplier; 57 return (*state) * xd3_hash_multiplier32;
82} 58}
83static inline uint32_t 59static inline uint32_t
84xd3_small_cksum_update (uint32_t *state, 60xd3_small_cksum_update (uint32_t *state,
@@ -86,66 +62,67 @@ xd3_small_cksum_update (uint32_t *state,
86 usize_t look) 62 usize_t look)
87{ 63{
88 UNALIGNED_READ32(state, base+1); 64 UNALIGNED_READ32(state, base+1);
89 return (*state) * hash_multiplier; 65 return (*state) * xd3_hash_multiplier32;
90} 66}
91 67
92/*********************************************************************** 68#if XD3_ENCODER
93 Ctable stuff 69inline usize_t
94 ***********************************************************************/
95
96static inline usize_t
97xd3_checksum_hash (const xd3_hash_cfg *cfg, const usize_t cksum) 70xd3_checksum_hash (const xd3_hash_cfg *cfg, const usize_t cksum)
98{ 71{
99 return (cksum >> cfg->shift) ^ (cksum & cfg->mask); 72 return (cksum >> cfg->shift) ^ (cksum & cfg->mask);
100} 73}
101 74
102/*********************************************************************** 75#if SIZEOF_USIZE_T == 4
103 Cksum function 76inline uint32_t
104 ***********************************************************************/ 77xd3_large32_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look)
105
106#if ADLER_LARGE_CKSUM
107static inline uint32_t
108xd3_lcksum (const uint8_t *seg, const usize_t ln)
109{ 78{
110 usize_t i = 0; 79 uint32_t h = 0;
111 uint32_t low = 0; 80 for (usize_t i = 0; i < look; i++) {
112 uint32_t high = 0; 81 h += base[i] * cfg->powers[i];
113 82 }
114 for (; i < ln; i += 1) 83 return h;
115 { 84}
116 low += PERMUTE(*seg++);
117 high += low;
118 }
119 85
120 return ((high & 0xffff) << 16) | (low & 0xffff); 86inline uint32_t
87xd3_large32_cksum_update (xd3_hash_cfg *cfg, const uint32_t cksum,
88 const uint8_t *base, const usize_t look)
89{
90 return xd3_hash_multiplier32 * cksum - cfg->multiplier * base[0] + base[look];
121} 91}
122#else 92#endif
123static inline uint32_t 93
124xd3_lcksum (const uint8_t *seg, const usize_t ln) 94#if SIZEOF_USIZE_T == 8
95inline uint64_t
96xd3_large64_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look)
125{ 97{
126 usize_t i, j; 98 uint64_t h = 0;
127 uint32_t h = 0; 99 for (usize_t i = 0; i < look; i++) {
128 for (i = 0, j = ln - 1; i < ln; ++i, --j) { 100 h += base[i] * cfg->powers[i];
129 h += PERMUTE(seg[i]) * hash_multiplier_powers[j];
130 } 101 }
131 return h; 102 return h;
132} 103}
104
105inline uint64_t
106xd3_large64_cksum_update (xd3_hash_cfg *cfg, const uint64_t cksum,
107 const uint8_t *base, const usize_t look)
108{
109 return xd3_hash_multiplier64 * cksum - cfg->multiplier * base[0] + base[look];
110}
133#endif 111#endif
134 112
135#if XD3_ENCODER
136static usize_t 113static usize_t
137xd3_size_log2 (usize_t slots) 114xd3_size_hashtable_bits (usize_t slots)
138{ 115{
139 int bits = 28; /* This should not be an unreasonable limit. */ 116 usize_t bits = (SIZEOF_USIZE_T * 8) - 1;
140 int i; 117 usize_t i;
141 118
142 for (i = 3; i <= bits; i += 1) 119 for (i = 3; i <= bits; i += 1)
143 { 120 {
144 if (slots < (1U << i)) 121 if (slots < (1U << i))
145 { 122 {
146 /* TODO: this is compaction=1 in checksum_test.cc and maybe should 123 /* Note: this is the compaction=1 setting measured in
147 * not be fixed at -1. */ 124 * checksum_test */
148 bits = i - 1; 125 bits = i - 1;
149 break; 126 break;
150 } 127 }
151 } 128 }
@@ -153,18 +130,34 @@ xd3_size_log2 (usize_t slots)
153 return bits; 130 return bits;
154} 131}
155 132
156static void 133int
157xd3_size_hashtable (xd3_stream *stream, 134xd3_size_hashtable (xd3_stream *stream,
158 usize_t slots, 135 usize_t slots,
159 xd3_hash_cfg *cfg) 136 usize_t look,
137 xd3_hash_cfg *cfg)
160{ 138{
161 int bits = xd3_size_log2 (slots); 139 usize_t bits = xd3_size_hashtable_bits (slots);
162 140
163 /* TODO: there's a 32-bit assumption here */ 141 cfg->size = (1U << bits);
164 cfg->size = (1 << bits);
165 cfg->mask = (cfg->size - 1); 142 cfg->mask = (cfg->size - 1);
166 cfg->shift = 32 - bits; 143 cfg->shift = (SIZEOF_USIZE_T * 8) - bits;
144 cfg->look = look;
145
146 if ((cfg->powers =
147 (usize_t*) xd3_alloc0 (stream, look, sizeof (usize_t))) == NULL)
148 {
149 return ENOMEM;
150 }
151
152 cfg->powers[look-1] = 1;
153 for (int i = look-2; i >= 0; i--)
154 {
155 cfg->powers[i] = cfg->powers[i+1] * xd3_hash_multiplier;
156 }
157 cfg->multiplier = cfg->powers[0] * xd3_hash_multiplier;
158
159 return 0;
167} 160}
168#endif
169 161
170#endif 162#endif /* XD3_ENCODER */
163#endif /* _XDELTA3_HASH_H_ */
diff --git a/xdelta3/xdelta3-internal.h b/xdelta3/xdelta3-internal.h
index eb360be..e9fc5f3 100644
--- a/xdelta3/xdelta3-internal.h
+++ b/xdelta3/xdelta3-internal.h
@@ -31,6 +31,7 @@ void main_file_cleanup (main_file *xfile);
31int main_file_isopen (main_file *xfile); 31int main_file_isopen (main_file *xfile);
32int main_file_open (main_file *xfile, const char* name, int mode); 32int main_file_open (main_file *xfile, const char* name, int mode);
33int main_file_exists (main_file *xfile); 33int main_file_exists (main_file *xfile);
34int main_file_stat (main_file *xfile, xoff_t *size);
34int xd3_whole_append_window (xd3_stream *stream); 35int xd3_whole_append_window (xd3_stream *stream);
35int xd3_main_cmdline (int argc, char **argv); 36int xd3_main_cmdline (int argc, char **argv);
36int main_file_read (main_file *ifile, 37int main_file_read (main_file *ifile,
@@ -40,9 +41,13 @@ int main_file_read (main_file *ifile,
40 const char *msg); 41 const char *msg);
41int main_file_write (main_file *ofile, uint8_t *buf, 42int main_file_write (main_file *ofile, uint8_t *buf,
42 usize_t size, const char *msg); 43 usize_t size, const char *msg);
44void* main_malloc (size_t size);
45void main_free (void *ptr);
46
43int test_compare_files (const char* f0, const char* f1); 47int test_compare_files (const char* f0, const char* f1);
44usize_t xd3_bytes_on_srcblk (xd3_source *src, xoff_t blkno); 48usize_t xd3_bytes_on_srcblk (xd3_source *src, xoff_t blkno);
45xoff_t xd3_source_eof(const xd3_source *src); 49xoff_t xd3_source_eof(const xd3_source *src);
50
46uint32_t xd3_large_cksum_update (uint32_t cksum, 51uint32_t xd3_large_cksum_update (uint32_t cksum,
47 const uint8_t *base, 52 const uint8_t *base,
48 usize_t look); 53 usize_t look);
@@ -58,7 +63,8 @@ xd3_output* xd3_alloc_output (xd3_stream *stream,
58 xd3_output *old_output); 63 xd3_output *old_output);
59 64
60int xd3_encode_init_full (xd3_stream *stream); 65int xd3_encode_init_full (xd3_stream *stream);
61size_t xd3_pow2_roundup (size_t x); 66usize_t xd3_pow2_roundup (usize_t x);
67long get_millisecs_now ();
62int xd3_process_stream (int is_encode, 68int xd3_process_stream (int is_encode,
63 xd3_stream *stream, 69 xd3_stream *stream,
64 int (*func) (xd3_stream *), 70 int (*func) (xd3_stream *),
@@ -73,6 +79,10 @@ int xd3_process_stream (int is_encode,
73int xd3_main_cmdline (int argc, char **argv); 79int xd3_main_cmdline (int argc, char **argv);
74#endif 80#endif
75 81
82#if REGRESSION_TEST
83int xd3_selftest (void);
84#endif
85
76/* main_file->mode values */ 86/* main_file->mode values */
77typedef enum 87typedef enum
78{ 88{
@@ -149,14 +159,13 @@ typedef struct {
149 * therefore always define xsnprintf_func. */ 159 * therefore always define xsnprintf_func. */
150#undef PRINTF_ATTRIBUTE 160#undef PRINTF_ATTRIBUTE
151#ifdef __GNUC__ 161#ifdef __GNUC__
152/* Let's just assume no one uses gcc 2.x! */
153#define PRINTF_ATTRIBUTE(x,y) __attribute__ ((__format__ (__printf__, x, y))) 162#define PRINTF_ATTRIBUTE(x,y) __attribute__ ((__format__ (__printf__, x, y)))
154#else 163#else
155#define PRINTF_ATTRIBUTE(x,y) 164#define PRINTF_ATTRIBUTE(x,y)
156#endif 165#endif
157 166
158/* Underlying xprintf() */ 167/* Underlying xprintf() */
159int xsnprintf_func (char *str, int n, const char *fmt, ...) 168int xsnprintf_func (char *str, size_t n, const char *fmt, ...)
160 PRINTF_ATTRIBUTE(3,4); 169 PRINTF_ATTRIBUTE(3,4);
161 170
162/* XPR(NT "", ...) (used by main) prefixes an "xdelta3: " to the output. */ 171/* XPR(NT "", ...) (used by main) prefixes an "xdelta3: " to the output. */
@@ -288,28 +297,11 @@ xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp,
288static inline int 297static inline int
289xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num) 298xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num)
290{ EMIT_INTEGER_TYPE (); } 299{ EMIT_INTEGER_TYPE (); }
291#endif 300#endif /* XD3_ENCODER */
292#endif 301#endif /* USE_UINT32 */
293 302
294#if USE_UINT64 303#if USE_UINT64
295static inline int 304static inline uint32_t
296xd3_decode_uint64_t (xd3_stream *stream, uint64_t *val)
297{ DECODE_INTEGER_TYPE (stream->dec_64part, UINT64_OFLOW_MASK); }
298
299#if XD3_ENCODER
300static inline int
301xd3_emit_uint64_t (xd3_stream *stream, xd3_output **output, uint64_t num)
302{ EMIT_INTEGER_TYPE (); }
303#endif
304
305/* These are tested but not used */
306#if REGRESSION_TEST
307static int
308xd3_read_uint64_t (xd3_stream *stream, const uint8_t **inpp,
309 const uint8_t *maxp, uint64_t *valp)
310{ READ_INTEGER_TYPE (uint64_t, UINT64_OFLOW_MASK); }
311
312static uint32_t
313xd3_sizeof_uint64_t (uint64_t num) 305xd3_sizeof_uint64_t (uint64_t num)
314{ 306{
315 IF_SIZEOF64(1); 307 IF_SIZEOF64(1);
@@ -324,49 +316,110 @@ xd3_sizeof_uint64_t (uint64_t num)
324 316
325 return 10; 317 return 10;
326} 318}
327#endif
328 319
329#endif 320static inline int
321xd3_decode_uint64_t (xd3_stream *stream, uint64_t *val)
322{ DECODE_INTEGER_TYPE (stream->dec_64part, UINT64_OFLOW_MASK); }
323
324static inline int
325xd3_read_uint64_t (xd3_stream *stream, const uint8_t **inpp,
326 const uint8_t *maxp, uint64_t *valp)
327{ READ_INTEGER_TYPE (uint64_t, UINT64_OFLOW_MASK); }
328
329#if XD3_ENCODER
330static inline int
331xd3_emit_uint64_t (xd3_stream *stream, xd3_output **output, uint64_t num)
332{ EMIT_INTEGER_TYPE (); }
333#endif /* XD3_ENCODER */
334#endif /* USE_UINT64 */
330 335
331#if SIZEOF_USIZE_T == 4 336#if SIZEOF_USIZE_T == 4
332#define USIZE_T_MAX UINT32_MAX 337#define USIZE_T_MAX UINT32_MAX
333#define USIZE_T_MAXBLKSZ 0x80000000U 338#define USIZE_T_MAXBLKSZ 0x80000000U
334#define xd3_decode_size xd3_decode_uint32_t 339#define XD3_MAXSRCWINSZ (1ULL << 31)
335#define xd3_emit_size xd3_emit_uint32_t 340#define xd3_large_cksum xd3_large32_cksum
336#define xd3_sizeof_size xd3_sizeof_uint32_t 341#define xd3_large_cksum_update xd3_large32_cksum_update
337#define xd3_read_size xd3_read_uint32_t 342#define xd3_hash_multiplier xd3_hash_multiplier32
343
344static inline uint32_t xd3_sizeof_size (usize_t num)
345{ return xd3_sizeof_uint32_t (num); }
346static inline int xd3_decode_size (xd3_stream *stream, usize_t *valp)
347{ return xd3_decode_uint32_t (stream, (uint32_t*) valp); }
348static inline int xd3_read_size (xd3_stream *stream, const uint8_t **inpp,
349 const uint8_t *maxp, usize_t *valp)
350{ return xd3_read_uint32_t (stream, inpp, maxp, (uint32_t*) valp); }
351#if XD3_ENCODER
352static inline int xd3_emit_size (xd3_stream *stream, xd3_output **output, usize_t num)
353{ return xd3_emit_uint32_t (stream, output, num); }
354#endif
355
338#elif SIZEOF_USIZE_T == 8 356#elif SIZEOF_USIZE_T == 8
339#define USIZE_T_MAX UINT64_MAX 357#define USIZE_T_MAX UINT64_MAX
340#define USIZE_T_MAXBLKSZ 0x8000000000000000ULL 358#define USIZE_T_MAXBLKSZ 0x8000000000000000ULL
341#define xd3_decode_size xd3_decode_uint64_t 359#define XD3_MAXSRCWINSZ (1ULL << 61)
342#define xd3_emit_size xd3_emit_uint64_t 360#define xd3_large_cksum xd3_large64_cksum
343#define xd3_sizeof_size xd3_sizeof_uint64_t 361#define xd3_large_cksum_update xd3_large64_cksum_update
344#define xd3_read_size xd3_read_uint64_t 362#define xd3_hash_multiplier xd3_hash_multiplier64
363
364static inline uint32_t xd3_sizeof_size (usize_t num)
365{ return xd3_sizeof_uint64_t (num); }
366static inline int xd3_decode_size (xd3_stream *stream, usize_t *valp)
367{ return xd3_decode_uint64_t (stream, (uint64_t*) valp); }
368static inline int xd3_read_size (xd3_stream *stream, const uint8_t **inpp,
369 const uint8_t *maxp, usize_t *valp)
370{ return xd3_read_uint64_t (stream, inpp, maxp, (uint64_t*) valp); }
371#if XD3_ENCODER
372static inline int xd3_emit_size (xd3_stream *stream, xd3_output **output, usize_t num)
373{ return xd3_emit_uint64_t (stream, output, num); }
345#endif 374#endif
346 375
376#endif /* SIZEOF_USIZE_T */
377
347#if SIZEOF_XOFF_T == 4 378#if SIZEOF_XOFF_T == 4
348#define XOFF_T_MAX UINT32_MAX 379#define XOFF_T_MAX UINT32_MAX
349#define xd3_emit_offset xd3_emit_uint32_t 380
350static inline int 381static inline int xd3_decode_offset (xd3_stream *stream, xoff_t *valp)
351xd3_decode_offset (xd3_stream *stream, xoff_t *val) 382{ return xd3_decode_uint32_t (stream, (uint32_t*) valp); }
352{ 383#if XD3_ENCODER
353 return xd3_decode_uint32_t (stream, (uint32_t*) val); 384static inline int xd3_emit_offset (xd3_stream *stream, xd3_output **output, xoff_t num)
354} 385{ return xd3_emit_uint32_t (stream, output, num); }
386#endif
387
355#elif SIZEOF_XOFF_T == 8 388#elif SIZEOF_XOFF_T == 8
356#define XOFF_T_MAX UINT64_MAX 389#define XOFF_T_MAX UINT64_MAX
357#define xd3_emit_offset xd3_emit_uint64_t 390
358static inline int 391static inline int xd3_decode_offset (xd3_stream *stream, xoff_t *valp)
359xd3_decode_offset (xd3_stream *stream, xoff_t *val) 392{ return xd3_decode_uint64_t (stream, (uint64_t*) valp); }
360{ 393#if XD3_ENCODER
361 return xd3_decode_uint64_t (stream, (uint64_t*) val); 394static inline int xd3_emit_offset (xd3_stream *stream, xd3_output **output, xoff_t num)
362} 395{ return xd3_emit_uint64_t (stream, output, num); }
396#endif
397
363#endif 398#endif
364 399
365#define USIZE_T_OVERFLOW(a,b) ((USIZE_T_MAX - (usize_t) (a)) < (usize_t) (b)) 400#define USIZE_T_OVERFLOW(a,b) ((USIZE_T_MAX - (usize_t) (a)) < (usize_t) (b))
366#define XOFF_T_OVERFLOW(a,b) ((XOFF_T_MAX - (xoff_t) (a)) < (xoff_t) (b)) 401#define XOFF_T_OVERFLOW(a,b) ((XOFF_T_MAX - (xoff_t) (a)) < (xoff_t) (b))
367 402
403int xd3_size_hashtable (xd3_stream *stream,
404 usize_t slots,
405 usize_t look,
406 xd3_hash_cfg *cfg);
407
408usize_t xd3_checksum_hash (const xd3_hash_cfg *cfg, const usize_t cksum);
409
410#if USE_UINT32
411uint32_t xd3_large32_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
412uint32_t xd3_large32_cksum_update (xd3_hash_cfg *cfg, uint32_t cksum,
413 const uint8_t *base, const usize_t look);
414#endif /* USE_UINT32 */
415
416#if USE_UINT64
417uint64_t xd3_large64_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
418uint64_t xd3_large64_cksum_update (xd3_hash_cfg *cfg, uint64_t cksum,
419 const uint8_t *base, const usize_t look);
420#endif /* USE_UINT64 */
421
368#define MAX_LRU_SIZE 32U 422#define MAX_LRU_SIZE 32U
369#define XD3_MINSRCWINSZ (XD3_ALLOCSIZE * MAX_LRU_SIZE) 423#define XD3_MINSRCWINSZ (XD3_ALLOCSIZE * MAX_LRU_SIZE)
370#define XD3_MAXSRCWINSZ (1ULL << 31)
371 424
372#endif // XDELTA3_INTERNAL_H__ 425#endif // XDELTA3_INTERNAL_H__
diff --git a/xdelta3/xdelta3-list.h b/xdelta3/xdelta3-list.h
index 6e3125f..2c8f3ef 100644
--- a/xdelta3/xdelta3-list.h
+++ b/xdelta3/xdelta3-list.h
@@ -115,7 +115,7 @@ static inline usize_t \
115LTYPE ## _length (LTYPE *l) \ 115LTYPE ## _length (LTYPE *l) \
116{ \ 116{ \
117 LTYPE *p; \ 117 LTYPE *p; \
118 int c = 0; \ 118 usize_t c = 0; \
119 \ 119 \
120 for (p = l->next; p != l; p = p->next) \ 120 for (p = l->next; p != l; p = p->next) \
121 { \ 121 { \
diff --git a/xdelta3/xdelta3-lzma.h b/xdelta3/xdelta3-lzma.h
index bcf2ddf..de317ee 100644
--- a/xdelta3/xdelta3-lzma.h
+++ b/xdelta3/xdelta3-lzma.h
@@ -55,7 +55,8 @@ xd3_lzma_init (xd3_stream *stream, xd3_lzma_stream *sec, int is_encode)
55 55
56 if (is_encode) 56 if (is_encode)
57 { 57 {
58 int preset = (stream->flags & XD3_COMPLEVEL_MASK) >> XD3_COMPLEVEL_SHIFT; 58 uint32_t preset =
59 (stream->flags & XD3_COMPLEVEL_MASK) >> XD3_COMPLEVEL_SHIFT;
59 60
60 if (lzma_lzma_preset(&sec->options, preset)) 61 if (lzma_lzma_preset(&sec->options, preset))
61 { 62 {
diff --git a/xdelta3/xdelta3-main.h b/xdelta3/xdelta3-main.h
index fac5d8d..1a3d95e 100644
--- a/xdelta3/xdelta3-main.h
+++ b/xdelta3/xdelta3-main.h
@@ -49,13 +49,28 @@
49 49
50/*********************************************************************/ 50/*********************************************************************/
51 51
52#include <limits.h>
53
54#ifndef XD3_POSIX
55#define XD3_POSIX 0
56#endif
57#ifndef XD3_STDIO
58#define XD3_STDIO 0
59#endif
60#ifndef XD3_WIN32
61#define XD3_WIN32 0
62#endif
63#ifndef NOT_MAIN
64#define NOT_MAIN 0
65#endif
66
52/* Combines xd3_strerror() and strerror() */ 67/* Combines xd3_strerror() and strerror() */
53const char* xd3_mainerror(int err_num); 68const char* xd3_mainerror(int err_num);
54 69
55#include "xdelta3-internal.h" 70#include "xdelta3-internal.h"
56 71
57int 72int
58xsnprintf_func (char *str, int n, const char *fmt, ...) 73xsnprintf_func (char *str, size_t n, const char *fmt, ...)
59{ 74{
60 va_list a; 75 va_list a;
61 int ret; 76 int ret;
@@ -230,9 +245,8 @@ static const char *option_source_filename = NULL;
230static int option_level = XD3_DEFAULT_LEVEL; 245static int option_level = XD3_DEFAULT_LEVEL;
231static usize_t option_iopt_size = XD3_DEFAULT_IOPT_SIZE; 246static usize_t option_iopt_size = XD3_DEFAULT_IOPT_SIZE;
232static usize_t option_winsize = XD3_DEFAULT_WINSIZE; 247static usize_t option_winsize = XD3_DEFAULT_WINSIZE;
233/* Note: option_srcwinsz is restricted from [16Kb, 4Gb], because 248
234 * addresses in the large hash checksum are 32 bits. The flag is read 249/* option_srcwinsz is restricted to [16kB, 2GB] when usize_t is 32 bits. */
235 * as xoff_t, so that 4Gb != 0. */
236static xoff_t option_srcwinsz = XD3_DEFAULT_SRCWINSZ; 250static xoff_t option_srcwinsz = XD3_DEFAULT_SRCWINSZ;
237static usize_t option_sprevsz = XD3_DEFAULT_SPREVSZ; 251static usize_t option_sprevsz = XD3_DEFAULT_SPREVSZ;
238 252
@@ -288,10 +302,6 @@ static void main_get_appheader (xd3_stream *stream, main_file *ifile,
288static int main_getblk_func (xd3_stream *stream, 302static int main_getblk_func (xd3_stream *stream,
289 xd3_source *source, 303 xd3_source *source,
290 xoff_t blkno); 304 xoff_t blkno);
291static void main_free (void *ptr);
292static void* main_malloc (size_t size);
293
294static int main_file_stat (main_file *xfile, xoff_t *size);
295static int main_file_seek (main_file *xfile, xoff_t pos); 305static int main_file_seek (main_file *xfile, xoff_t pos);
296static int main_read_primary_input (main_file *file, 306static int main_read_primary_input (main_file *file,
297 uint8_t *buf, 307 uint8_t *buf,
@@ -364,6 +374,7 @@ main_config (void)
364 XPR(NTR "XD3_STDIO=%d\n", XD3_STDIO); 374 XPR(NTR "XD3_STDIO=%d\n", XD3_STDIO);
365 XPR(NTR "XD3_WIN32=%d\n", XD3_WIN32); 375 XPR(NTR "XD3_WIN32=%d\n", XD3_WIN32);
366 XPR(NTR "XD3_USE_LARGEFILE64=%d\n", XD3_USE_LARGEFILE64); 376 XPR(NTR "XD3_USE_LARGEFILE64=%d\n", XD3_USE_LARGEFILE64);
377 XPR(NTR "XD3_USE_LARGESIZET=%d\n", XD3_USE_LARGESIZET);
367 XPR(NTR "XD3_DEFAULT_LEVEL=%d\n", XD3_DEFAULT_LEVEL); 378 XPR(NTR "XD3_DEFAULT_LEVEL=%d\n", XD3_DEFAULT_LEVEL);
368 XPR(NTR "XD3_DEFAULT_IOPT_SIZE=%d\n", XD3_DEFAULT_IOPT_SIZE); 379 XPR(NTR "XD3_DEFAULT_IOPT_SIZE=%d\n", XD3_DEFAULT_IOPT_SIZE);
369 XPR(NTR "XD3_DEFAULT_SPREVSZ=%d\n", XD3_DEFAULT_SPREVSZ); 380 XPR(NTR "XD3_DEFAULT_SPREVSZ=%d\n", XD3_DEFAULT_SPREVSZ);
@@ -440,7 +451,7 @@ void* main_bufalloc (size_t size) {
440#endif 451#endif
441} 452}
442 453
443static void* 454void*
444main_malloc (size_t size) 455main_malloc (size_t size)
445{ 456{
446 void *r = main_malloc1 (size); 457 void *r = main_malloc1 (size);
@@ -462,7 +473,7 @@ main_free1 (void *opaque, void *ptr)
462 free (ptr); 473 free (ptr);
463} 474}
464 475
465static void 476void
466main_free (void *ptr) 477main_free (void *ptr)
467{ 478{
468 if (ptr) 479 if (ptr)
@@ -533,7 +544,7 @@ xd3_mainerror(int err_num) {
533#endif 544#endif
534} 545}
535 546
536static long 547long
537get_millisecs_now (void) 548get_millisecs_now (void)
538{ 549{
539#ifndef _WIN32 550#ifndef _WIN32
@@ -656,14 +667,17 @@ main_strtoxoff (const char* s, xoff_t *xo, char which)
656 XD3_ASSERT(s && *s != 0); 667 XD3_ASSERT(s && *s != 0);
657 668
658 { 669 {
659 /* Should check LONG_MIN, LONG_MAX, LLONG_MIN, LLONG_MAX? */ 670#if SIZEOF_XOFF_T == SIZEOF_UNSIGNED_LONG_LONG
660#if SIZEOF_XOFF_T == 4 671 unsigned long long xx = strtoull (s, &e, 0);
661 long xx = strtol (s, &e, 0); 672 unsigned long long bad = ULLONG_MAX;
673#elif SIZEOF_XOFF_T <= SIZEOF_UNSIGNED_LONG
674 unsigned long xx = strtoul (s, &e, 0);
675 unsigned long long bad = ULONG_MAX;
662#else 676#else
663 long long xx = strtoll (s, &e, 0); 677 /* Something wrong with SIZEOF_XOFF_T, SIZEOF_UNSIGNED_LONG, etc. */
664#endif 678#endif
665 679
666 if (xx < 0) 680 if (xx == bad)
667 { 681 {
668 XPR(NT "-%c: negative integer: %s\n", which, s); 682 XPR(NT "-%c: negative integer: %s\n", which, s);
669 return EXIT_FAILURE; 683 return EXIT_FAILURE;
@@ -1093,7 +1107,7 @@ main_file_write (main_file *ofile, uint8_t *buf, usize_t size, const char *msg)
1093 } 1107 }
1094 else 1108 else
1095 { 1109 {
1096 if (option_verbose > 5) { XPR(NT "write %s: %u bytes\n", 1110 if (option_verbose > 5) { XPR(NT "write %s: %"W"u bytes\n",
1097 ofile->filename, size); } 1111 ofile->filename, size); }
1098 ofile->nwrite += size; 1112 ofile->nwrite += size;
1099 } 1113 }
@@ -1231,6 +1245,15 @@ main_set_secondary_flags (xd3_config *config)
1231 } 1245 }
1232 } 1246 }
1233 1247
1248 if (option_verbose)
1249 {
1250 XPR(NT "secondary compression: %s\n",
1251 (config->flags | XD3_SEC_LZMA) ? "lzma" :
1252 ((config->flags | XD3_SEC_FGK) ? "fgk" :
1253 ((config->flags | XD3_SEC_DJW) ? "djw" :
1254 "none")));
1255 }
1256
1234 return 0; 1257 return 0;
1235} 1258}
1236 1259
@@ -1291,7 +1314,8 @@ main_print_window (xd3_stream* stream, main_file *xfile)
1291 addr_bytes = (usize_t)(stream->addr_sect.buf - addr_before); 1314 addr_bytes = (usize_t)(stream->addr_sect.buf - addr_before);
1292 inst_bytes = (usize_t)(stream->inst_sect.buf - inst_before); 1315 inst_bytes = (usize_t)(stream->inst_sect.buf - inst_before);
1293 1316
1294 VC(UT " %06"Q"u %03u %s %6u", stream->dec_winstart + size, 1317 VC(UT " %06"Q"u %03"W"u %s %6"W"u",
1318 stream->dec_winstart + size,
1295 option_print_cpymode ? code : 0, 1319 option_print_cpymode ? code : 0,
1296 xd3_rtype_to_string ((xd3_rtype) stream->dec_current1.type, 1320 xd3_rtype_to_string ((xd3_rtype) stream->dec_current1.type,
1297 option_print_cpymode), 1321 option_print_cpymode),
@@ -1303,7 +1327,7 @@ main_print_window (xd3_stream* stream, main_file *xfile)
1303 { 1327 {
1304 if (stream->dec_current1.addr >= stream->dec_cpylen) 1328 if (stream->dec_current1.addr >= stream->dec_cpylen)
1305 { 1329 {
1306 VC(UT " T@%-6u", 1330 VC(UT " T@%-6"W"u",
1307 stream->dec_current1.addr - stream->dec_cpylen)VE; 1331 stream->dec_current1.addr - stream->dec_cpylen)VE;
1308 } 1332 }
1309 else 1333 else
@@ -1322,7 +1346,7 @@ main_print_window (xd3_stream* stream, main_file *xfile)
1322 1346
1323 if (stream->dec_current2.type != XD3_NOOP) 1347 if (stream->dec_current2.type != XD3_NOOP)
1324 { 1348 {
1325 VC(UT " %s %6u", 1349 VC(UT " %s %6"W"u",
1326 xd3_rtype_to_string ((xd3_rtype) stream->dec_current2.type, 1350 xd3_rtype_to_string ((xd3_rtype) stream->dec_current2.type,
1327 option_print_cpymode), 1351 option_print_cpymode),
1328 stream->dec_current2.size)VE; 1352 stream->dec_current2.size)VE;
@@ -1331,7 +1355,7 @@ main_print_window (xd3_stream* stream, main_file *xfile)
1331 { 1355 {
1332 if (stream->dec_current2.addr >= stream->dec_cpylen) 1356 if (stream->dec_current2.addr >= stream->dec_cpylen)
1333 { 1357 {
1334 VC(UT " T@%-6u", 1358 VC(UT " T@%-6"W"u",
1335 stream->dec_current2.addr - stream->dec_cpylen)VE; 1359 stream->dec_current2.addr - stream->dec_cpylen)VE;
1336 } 1360 }
1337 else 1361 else
@@ -1351,7 +1375,7 @@ main_print_window (xd3_stream* stream, main_file *xfile)
1351 (stream->dec_current1.type >= XD3_CPY || 1375 (stream->dec_current1.type >= XD3_CPY ||
1352 stream->dec_current2.type >= XD3_CPY)) 1376 stream->dec_current2.type >= XD3_CPY))
1353 { 1377 {
1354 VC(UT " %06"Q"u (inefficiency) %u encoded as %u bytes\n", 1378 VC(UT " %06"Q"u (inefficiency) %"W"u encoded as %"W"u bytes\n",
1355 stream->dec_winstart + size_before, 1379 stream->dec_winstart + size_before,
1356 size - size_before, 1380 size - size_before,
1357 addr_bytes + inst_bytes)VE; 1381 addr_bytes + inst_bytes)VE;
@@ -1419,7 +1443,7 @@ main_print_func (xd3_stream* stream, main_file *xfile)
1419 if (stream->dec_winstart == 0) 1443 if (stream->dec_winstart == 0)
1420 { 1444 {
1421 VC(UT "VCDIFF version: 0\n")VE; 1445 VC(UT "VCDIFF version: 0\n")VE;
1422 VC(UT "VCDIFF header size: %d\n", 1446 VC(UT "VCDIFF header size: %"W"u\n",
1423 stream->dec_hdrsize)VE; 1447 stream->dec_hdrsize)VE;
1424 VC(UT "VCDIFF header indicator: ")VE; 1448 VC(UT "VCDIFF header indicator: ")VE;
1425 if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0) 1449 if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0)
@@ -1485,7 +1509,7 @@ main_print_func (xd3_stream* stream, main_file *xfile)
1485 if ((stream->dec_win_ind & VCD_ADLER32) != 0) 1509 if ((stream->dec_win_ind & VCD_ADLER32) != 0)
1486 { 1510 {
1487 VC(UT "VCDIFF adler32 checksum: %08X\n", 1511 VC(UT "VCDIFF adler32 checksum: %08X\n",
1488 (usize_t)stream->dec_adler32)VE; 1512 stream->dec_adler32)VE;
1489 } 1513 }
1490 1514
1491 if (stream->dec_del_ind != 0) 1515 if (stream->dec_del_ind != 0)
@@ -1505,22 +1529,22 @@ main_print_func (xd3_stream* stream, main_file *xfile)
1505 1529
1506 if (SRCORTGT (stream->dec_win_ind)) 1530 if (SRCORTGT (stream->dec_win_ind))
1507 { 1531 {
1508 VC(UT "VCDIFF copy window length: %u\n", 1532 VC(UT "VCDIFF copy window length: %"W"u\n",
1509 (usize_t)stream->dec_cpylen)VE; 1533 stream->dec_cpylen)VE;
1510 VC(UT "VCDIFF copy window offset: %"Q"u\n", 1534 VC(UT "VCDIFF copy window offset: %"Q"u\n",
1511 stream->dec_cpyoff)VE; 1535 stream->dec_cpyoff)VE;
1512 } 1536 }
1513 1537
1514 VC(UT "VCDIFF delta encoding length: %u\n", 1538 VC(UT "VCDIFF delta encoding length: %"W"u\n",
1515 (usize_t)stream->dec_enclen)VE; 1539 (usize_t)stream->dec_enclen)VE;
1516 VC(UT "VCDIFF target window length: %u\n", 1540 VC(UT "VCDIFF target window length: %"W"u\n",
1517 (usize_t)stream->dec_tgtlen)VE; 1541 (usize_t)stream->dec_tgtlen)VE;
1518 1542
1519 VC(UT "VCDIFF data section length: %u\n", 1543 VC(UT "VCDIFF data section length: %"W"u\n",
1520 (usize_t)stream->data_sect.size)VE; 1544 (usize_t)stream->data_sect.size)VE;
1521 VC(UT "VCDIFF inst section length: %u\n", 1545 VC(UT "VCDIFF inst section length: %"W"u\n",
1522 (usize_t)stream->inst_sect.size)VE; 1546 (usize_t)stream->inst_sect.size)VE;
1523 VC(UT "VCDIFF addr section length: %u\n", 1547 VC(UT "VCDIFF addr section length: %"W"u\n",
1524 (usize_t)stream->addr_sect.size)VE; 1548 (usize_t)stream->addr_sect.size)VE;
1525 1549
1526 ret = 0; 1550 ret = 0;
@@ -1960,8 +1984,11 @@ main_merge_output (xd3_stream *stream, main_file *ofile)
1960 XD3_ASSERT (inst->addr >= window_start); 1984 XD3_ASSERT (inst->addr >= window_start);
1961 addr = inst->addr - window_start; 1985 addr = inst->addr - window_start;
1962 } 1986 }
1963 IF_DEBUG2 (XPR(NTR "[merge copy] winpos %u take %u addr %"Q"u mode %u\n", 1987 IF_DEBUG2 ({
1964 window_pos, take, addr, inst->mode)); 1988 XPR(NTR "[merge copy] winpos %"W" take %"W" "
1989 "addr %"Q" mode %u\n",
1990 window_pos, take, addr, inst->mode);
1991 });
1965 if ((ret = xd3_found_match (recode_stream, window_pos, take, 1992 if ((ret = xd3_found_match (recode_stream, window_pos, take,
1966 addr, inst->mode != 0))) 1993 addr, inst->mode != 0)))
1967 { 1994 {
@@ -3034,7 +3061,7 @@ main_input (xd3_cmd cmd,
3034 { 3061 {
3035 const char *s = option_smatch_config; 3062 const char *s = option_smatch_config;
3036 char *e; 3063 char *e;
3037 int values[XD3_SOFTCFG_VARCNT]; 3064 long values[XD3_SOFTCFG_VARCNT];
3038 int got; 3065 int got;
3039 3066
3040 config.smatch_cfg = XD3_SMATCH_SOFT; 3067 config.smatch_cfg = XD3_SMATCH_SOFT;
@@ -3291,7 +3318,7 @@ main_input (xd3_cmd cmd,
3291 stream.i_slots_used > stream.iopt_size) 3318 stream.i_slots_used > stream.iopt_size)
3292 { 3319 {
3293 XPR(NT "warning: input position %"Q"u overflowed " 3320 XPR(NT "warning: input position %"Q"u overflowed "
3294 "instruction buffer, needed %u (vs. %u), " 3321 "instruction buffer, needed %"W"u (vs. %"W"u), "
3295 "consider changing -I\n", 3322 "consider changing -I\n",
3296 stream.current_window * winsize, 3323 stream.current_window * winsize,
3297 stream.i_slots_used, stream.iopt_size); 3324 stream.i_slots_used, stream.iopt_size);
@@ -3416,10 +3443,10 @@ done:
3416 if (option_verbose > 1 && cmd == CMD_ENCODE) 3443 if (option_verbose > 1 && cmd == CMD_ENCODE)
3417 { 3444 {
3418 XPR(NT "scanner configuration: %s\n", stream.smatcher.name); 3445 XPR(NT "scanner configuration: %s\n", stream.smatcher.name);
3419 XPR(NT "target hash table size: %u\n", stream.small_hash.size); 3446 XPR(NT "target hash table size: %"W"u\n", stream.small_hash.size);
3420 if (sfile != NULL && sfile->filename != NULL) 3447 if (sfile != NULL && sfile->filename != NULL)
3421 { 3448 {
3422 XPR(NT "source hash table size: %u\n", stream.large_hash.size); 3449 XPR(NT "source hash table size: %"W"u\n", stream.large_hash.size);
3423 } 3450 }
3424 } 3451 }
3425 3452
diff --git a/xdelta3/xdelta3-second.h b/xdelta3/xdelta3-second.h
index e8a2c8f..899902e 100644
--- a/xdelta3/xdelta3-second.h
+++ b/xdelta3/xdelta3-second.h
@@ -122,7 +122,7 @@ xd3_decode_secondary (xd3_stream *stream,
122 xd3_desect *sect, 122 xd3_desect *sect,
123 xd3_sec_stream **sec_streamp) 123 xd3_sec_stream **sec_streamp)
124{ 124{
125 uint32_t dec_size; 125 usize_t dec_size;
126 uint8_t *out_used; 126 uint8_t *out_used;
127 int ret; 127 int ret;
128 128
diff --git a/xdelta3/xdelta3-test.h b/xdelta3/xdelta3-test.h
index 6d7d85b..335de28 100644
--- a/xdelta3/xdelta3-test.h
+++ b/xdelta3/xdelta3-test.h
@@ -139,7 +139,10 @@ static char TEST_RECON2_FILE[TESTFILESIZE];
139static char TEST_COPY_FILE[TESTFILESIZE]; 139static char TEST_COPY_FILE[TESTFILESIZE];
140static char TEST_NOPERM_FILE[TESTFILESIZE]; 140static char TEST_NOPERM_FILE[TESTFILESIZE];
141 141
142#define CHECK(cond) if (!(cond)) { XPR(NT "check failure: " #cond); abort(); } 142#define CHECK(cond) \
143 if (!(cond)) { \
144 XPR(NT __FILE__":%d: check failure: " #cond, __LINE__); \
145 abort(); }
143 146
144#if SHELL_TESTS 147#if SHELL_TESTS
145/* Use a fixed soft config so that test values are fixed. See also 148/* Use a fixed soft config so that test values are fixed. See also
@@ -199,7 +202,7 @@ test_random_numbers (xd3_stream *stream, int ignore)
199 202
200 for (i = 0; i < n_rounds; i += 1) 203 for (i = 0; i < n_rounds; i += 1)
201 { 204 {
202 sum += mt_exp_rand (mean, USIZE_T_MAX); 205 sum += mt_exp_rand (mean, UINT32_MAX);
203 } 206 }
204 207
205 average = (double) sum / (double) n_rounds; 208 average = (double) sum / (double) n_rounds;
@@ -704,13 +707,13 @@ test_forward_match (xd3_stream *stream, int unused)
704 707
705 for (i = 0; i < 256; i++) 708 for (i = 0; i < 256; i++)
706 { 709 {
707 CHECK(xd3_forward_match(buf1, buf2, i) == (int)i); 710 CHECK(xd3_forward_match(buf1, buf2, i) == i);
708 } 711 }
709 712
710 for (i = 0; i < 255; i++) 713 for (i = 0; i < 255; i++)
711 { 714 {
712 buf2[i] = 1; 715 buf2[i] = 1;
713 CHECK(xd3_forward_match(buf1, buf2, 256) == (int)i); 716 CHECK(xd3_forward_match(buf1, buf2, 256) == i);
714 buf2[i] = 0; 717 buf2[i] = 0;
715 } 718 }
716 719
@@ -759,7 +762,7 @@ test_address_cache (xd3_stream *stream, int unused)
759 usize_t prev_i; 762 usize_t prev_i;
760 usize_t nearby; 763 usize_t nearby;
761 764
762 p = (mt_random (&static_mtrand) / (double)USIZE_T_MAX); 765 p = (mt_random (&static_mtrand) / (double)UINT32_MAX);
763 prev_i = mt_random (&static_mtrand) % offset; 766 prev_i = mt_random (&static_mtrand) % offset;
764 nearby = (mt_random (&static_mtrand) % 256) % offset; 767 nearby = (mt_random (&static_mtrand) % 256) % offset;
765 nearby = xd3_max (1U, nearby); 768 nearby = xd3_max (1U, nearby);
@@ -790,9 +793,13 @@ test_address_cache (xd3_stream *stream, int unused)
790 793
791 for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1) 794 for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1)
792 { 795 {
793 uint32_t addr; 796 usize_t addr;
794 797
795 if ((ret = xd3_decode_address (stream, offset, modes[offset], & buf, buf_max, & addr))) { return ret; } 798 if ((ret = xd3_decode_address (stream, offset, modes[offset],
799 & buf, buf_max, & addr)))
800 {
801 return ret;
802 }
796 803
797 if (addr != addrs[offset]) 804 if (addr != addrs[offset])
798 { 805 {
@@ -1418,7 +1425,7 @@ test_secondary (xd3_stream *stream, const xd3_sec_type *sec, usize_t groups)
1418 if ((ret = sec->encode (stream, enc_stream, 1425 if ((ret = sec->encode (stream, enc_stream,
1419 in_head, out_head, & cfg))) 1426 in_head, out_head, & cfg)))
1420 { 1427 {
1421 XPR(NT "test %u: encode: %s", test_i, stream->msg); 1428 XPR(NT "test %"W"u: encode: %s", test_i, stream->msg);
1422 goto fail; 1429 goto fail;
1423 } 1430 }
1424 1431
@@ -1457,7 +1464,7 @@ test_secondary (xd3_stream *stream, const xd3_sec_type *sec, usize_t groups)
1457 compress_size, dec_input, 1464 compress_size, dec_input,
1458 dec_correct, dec_output))) 1465 dec_correct, dec_output)))
1459 { 1466 {
1460 XPR(NT "test %u: decode: %s", test_i, stream->msg); 1467 XPR(NT "test %"W"u: decode: %s", test_i, stream->msg);
1461 goto fail; 1468 goto fail;
1462 } 1469 }
1463 1470
@@ -1577,6 +1584,45 @@ test_choose_instruction (xd3_stream *stream, int ignore)
1577 return 0; 1584 return 0;
1578} 1585}
1579 1586
1587static int
1588test_checksum_step (xd3_stream *stream, int ignore)
1589{
1590 const int bufsize = 128;
1591 uint8_t buf[bufsize];
1592 for (int i = 0; i < bufsize; i++)
1593 {
1594 buf[i] = mt_random (&static_mtrand) & 0xff;
1595 }
1596
1597 for (usize_t cksize = 4; cksize <= 32; cksize += 3)
1598 {
1599 xd3_hash_cfg h1;
1600 usize_t x;
1601 int ret;
1602
1603 if ((ret = xd3_size_hashtable (stream, XD3_ALLOCSIZE, cksize, &h1)) != 0)
1604 {
1605 return ret;
1606 }
1607
1608 x = xd3_large_cksum (&h1, buf, cksize);
1609 for (usize_t pos = 0; pos <= (bufsize - cksize); pos++)
1610 {
1611 usize_t y = xd3_large_cksum (&h1, buf + pos, cksize);
1612 if (x != y)
1613 {
1614 stream->msg = "checksum != incremental checksum";
1615 return XD3_INTERNAL;
1616 }
1617 x = xd3_large_cksum_update (&h1, x, buf + pos, cksize);
1618 }
1619
1620 xd3_free (stream, h1.powers);
1621 }
1622
1623 return 0;
1624}
1625
1580/*********************************************************************** 1626/***********************************************************************
1581 64BIT STREAMING 1627 64BIT STREAMING
1582 ***********************************************************************/ 1628 ***********************************************************************/
@@ -2714,14 +2760,14 @@ test_string_matching (xd3_stream *stream, int ignore)
2714 default: CHECK(0); 2760 default: CHECK(0);
2715 } 2761 }
2716 2762
2717 snprintf_func (rptr, rbuf+TESTBUFSIZE-rptr, "%d/%d", 2763 snprintf_func (rptr, rbuf+TESTBUFSIZE-rptr, "%"W"u/%"W"u",
2718 inst->pos, inst->size); 2764 inst->pos, inst->size);
2719 rptr += strlen (rptr); 2765 rptr += strlen (rptr);
2720 2766
2721 if (inst->type == XD3_CPY) 2767 if (inst->type == XD3_CPY)
2722 { 2768 {
2723 *rptr++ = '@'; 2769 *rptr++ = '@';
2724 snprintf_func (rptr, rbuf+TESTBUFSIZE-rptr, "%"Q"d", inst->addr); 2770 snprintf_func (rptr, rbuf+TESTBUFSIZE-rptr, "%"Q"u", inst->addr);
2725 rptr += strlen (rptr); 2771 rptr += strlen (rptr);
2726 } 2772 }
2727 2773
@@ -2737,7 +2783,7 @@ test_string_matching (xd3_stream *stream, int ignore)
2737 2783
2738 if (strcmp (rbuf, test->result) != 0) 2784 if (strcmp (rbuf, test->result) != 0)
2739 { 2785 {
2740 XPR(NT "test %u: expected %s: got %s", i, test->result, rbuf); 2786 XPR(NT "test %"W"u: expected %s: got %s", i, test->result, rbuf);
2741 stream->msg = "wrong result"; 2787 stream->msg = "wrong result";
2742 return XD3_INTERNAL; 2788 return XD3_INTERNAL;
2743 } 2789 }
@@ -2812,9 +2858,10 @@ test_iopt_flush_instructions (xd3_stream *stream, int ignore)
2812/* 2858/*
2813 * This tests the 32/64bit ambiguity for source-window matching. 2859 * This tests the 32/64bit ambiguity for source-window matching.
2814 */ 2860 */
2861#if !XD3_USE_LARGESIZET
2815static int 2862static int
2816test_source_cksum_offset (xd3_stream *stream, int ignore) 2863test_source_cksum_offset (xd3_stream *stream, int ignore)
2817{ 2864 {
2818 xd3_source source; 2865 xd3_source source;
2819 2866
2820 // Inputs are: 2867 // Inputs are:
@@ -2848,7 +2895,7 @@ test_source_cksum_offset (xd3_stream *stream, int ignore)
2848 stream->src = &source; 2895 stream->src = &source;
2849 2896
2850 for (test_ptr = cksum_test; test_ptr->cpos; test_ptr++) { 2897 for (test_ptr = cksum_test; test_ptr->cpos; test_ptr++) {
2851 xoff_t r; 2898 xoff_t r;
2852 stream->srcwin_cksum_pos = test_ptr->cpos; 2899 stream->srcwin_cksum_pos = test_ptr->cpos;
2853 stream->total_in = test_ptr->ipos; 2900 stream->total_in = test_ptr->ipos;
2854 2901
@@ -2857,6 +2904,7 @@ test_source_cksum_offset (xd3_stream *stream, int ignore)
2857 } 2904 }
2858 return 0; 2905 return 0;
2859} 2906}
2907#endif /* !XD3_USE_LARGESIZET */
2860 2908
2861static int 2909static int
2862test_in_memory (xd3_stream *stream, int ignore) 2910test_in_memory (xd3_stream *stream, int ignore)
@@ -2899,8 +2947,7 @@ test_in_memory (xd3_stream *stream, int ignore)
2899 TEST MAIN 2947 TEST MAIN
2900 ***********************************************************************/ 2948 ***********************************************************************/
2901 2949
2902static int 2950int xd3_selftest (void)
2903xd3_selftest (void)
2904{ 2951{
2905#define DO_TEST(fn,flags,arg) \ 2952#define DO_TEST(fn,flags,arg) \
2906 do { \ 2953 do { \
@@ -2928,8 +2975,8 @@ xd3_selftest (void)
2928 DO_TEST (encode_decode_uint32_t, 0, 0); 2975 DO_TEST (encode_decode_uint32_t, 0, 0);
2929 DO_TEST (encode_decode_uint64_t, 0, 0); 2976 DO_TEST (encode_decode_uint64_t, 0, 0);
2930 DO_TEST (usize_t_overflow, 0, 0); 2977 DO_TEST (usize_t_overflow, 0, 0);
2978 DO_TEST (checksum_step, 0, 0);
2931 DO_TEST (forward_match, 0, 0); 2979 DO_TEST (forward_match, 0, 0);
2932
2933 DO_TEST (address_cache, 0, 0); 2980 DO_TEST (address_cache, 0, 0);
2934 2981
2935 DO_TEST (string_matching, 0, 0); 2982 DO_TEST (string_matching, 0, 0);
@@ -2938,7 +2985,9 @@ xd3_selftest (void)
2938 DO_TEST (in_memory, 0, 0); 2985 DO_TEST (in_memory, 0, 0);
2939 2986
2940 DO_TEST (iopt_flush_instructions, 0, 0); 2987 DO_TEST (iopt_flush_instructions, 0, 0);
2988#if !XD3_USE_LARGESIZET
2941 DO_TEST (source_cksum_offset, 0, 0); 2989 DO_TEST (source_cksum_offset, 0, 0);
2990#endif
2942 2991
2943 DO_TEST (decompress_single_bit_error, 0, 3); 2992 DO_TEST (decompress_single_bit_error, 0, 3);
2944 DO_TEST (decompress_single_bit_error, XD3_ADLER32, 3); 2993 DO_TEST (decompress_single_bit_error, XD3_ADLER32, 3);
diff --git a/xdelta3/xdelta3.c b/xdelta3/xdelta3.c
index 976b219..9253d68 100644
--- a/xdelta3/xdelta3.c
+++ b/xdelta3/xdelta3.c
@@ -350,7 +350,9 @@ typedef unsigned int xd3_rtype;
350 350
351#include "xdelta3-list.h" 351#include "xdelta3-list.h"
352 352
353#if XD3_ENCODER
353XD3_MAKELIST(xd3_rlist, xd3_rinst, link); 354XD3_MAKELIST(xd3_rlist, xd3_rinst, link);
355#endif
354 356
355/***********************************************************************/ 357/***********************************************************************/
356 358
@@ -376,9 +378,6 @@ XD3_MAKELIST(xd3_rlist, xd3_rinst, link);
376#define ALPHABET_SIZE 256 /* Used in test code--size of the secondary 378#define ALPHABET_SIZE 256 /* Used in test code--size of the secondary
377 * compressor alphabet. */ 379 * compressor alphabet. */
378 380
379#define HASH_PERMUTE 1 /* The input is permuted by random nums */
380#define ADLER_LARGE_CKSUM 1 /* Adler checksum vs. RK checksum */
381
382#define HASH_CKOFFSET 1U /* Table entries distinguish "no-entry" from 381#define HASH_CKOFFSET 1U /* Table entries distinguish "no-entry" from
383 * offset 0 using this offset. */ 382 * offset 0 using this offset. */
384 383
@@ -476,9 +475,9 @@ static void xd3_free_output (xd3_stream *stream,
476 xd3_output *output); 475 xd3_output *output);
477 476
478static int xd3_emit_double (xd3_stream *stream, xd3_rinst *first, 477static int xd3_emit_double (xd3_stream *stream, xd3_rinst *first,
479 xd3_rinst *second, usize_t code); 478 xd3_rinst *second, uint8_t code);
480static int xd3_emit_single (xd3_stream *stream, xd3_rinst *single, 479static int xd3_emit_single (xd3_stream *stream, xd3_rinst *single,
481 usize_t code); 480 uint8_t code);
482 481
483static usize_t xd3_sizeof_output (xd3_output *output); 482static usize_t xd3_sizeof_output (xd3_output *output);
484static void xd3_encode_reset (xd3_stream *stream); 483static void xd3_encode_reset (xd3_stream *stream);
@@ -503,8 +502,6 @@ static int xd3_srcwin_move_point (xd3_stream *stream,
503 502
504static int xd3_emit_run (xd3_stream *stream, usize_t pos, 503static int xd3_emit_run (xd3_stream *stream, usize_t pos,
505 usize_t size, uint8_t *run_c); 504 usize_t size, uint8_t *run_c);
506static usize_t xd3_checksum_hash (const xd3_hash_cfg *cfg,
507 const usize_t cksum);
508static xoff_t xd3_source_cksum_offset(xd3_stream *stream, usize_t low); 505static xoff_t xd3_source_cksum_offset(xd3_stream *stream, usize_t low);
509static void xd3_scksum_insert (xd3_stream *stream, 506static void xd3_scksum_insert (xd3_stream *stream,
510 usize_t inx, 507 usize_t inx,
@@ -519,7 +516,7 @@ static void xd3_verify_run_state (xd3_stream *stream,
519 uint8_t *x_run_c); 516 uint8_t *x_run_c);
520static void xd3_verify_large_state (xd3_stream *stream, 517static void xd3_verify_large_state (xd3_stream *stream,
521 const uint8_t *inp, 518 const uint8_t *inp,
522 uint32_t x_cksum); 519 usize_t x_cksum);
523static void xd3_verify_small_state (xd3_stream *stream, 520static void xd3_verify_small_state (xd3_stream *stream,
524 const uint8_t *inp, 521 const uint8_t *inp,
525 uint32_t x_cksum); 522 uint32_t x_cksum);
@@ -533,15 +530,6 @@ static int xd3_decode_allocate (xd3_stream *stream, usize_t size,
533static void* xd3_alloc (xd3_stream *stream, usize_t elts, usize_t size); 530static void* xd3_alloc (xd3_stream *stream, usize_t elts, usize_t size);
534static void xd3_free (xd3_stream *stream, void *ptr); 531static void xd3_free (xd3_stream *stream, void *ptr);
535 532
536static int xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp,
537 const uint8_t *max, uint32_t *valp);
538
539#if REGRESSION_TEST
540static int xd3_selftest (void);
541#endif
542
543/***********************************************************************/
544
545const char* xd3_strerror (int ret) 533const char* xd3_strerror (int ret)
546{ 534{
547 switch (ret) 535 switch (ret)
@@ -570,7 +558,7 @@ const char* xd3_strerror (int ret)
570 558
571struct _xd3_sec_type 559struct _xd3_sec_type
572{ 560{
573 int id; 561 uint8_t id;
574 const char *name; 562 const char *name;
575 xd3_secondary_flags flags; 563 xd3_secondary_flags flags;
576 564
@@ -602,7 +590,7 @@ struct _xd3_sec_type
602typedef struct _bit_state bit_state; 590typedef struct _bit_state bit_state;
603struct _bit_state 591struct _bit_state
604{ 592{
605 usize_t cur_byte; 593 uint8_t cur_byte;
606 usize_t cur_mask; 594 usize_t cur_mask;
607}; 595};
608 596
@@ -747,44 +735,6 @@ const xd3_sec_type lzma_sec_type =
747#endif /* __XDELTA3_C_HEADER_PASS__ */ 735#endif /* __XDELTA3_C_HEADER_PASS__ */
748#ifdef __XDELTA3_C_INLINE_PASS__ 736#ifdef __XDELTA3_C_INLINE_PASS__
749 737
750const uint16_t __single_hash[256] =
751{
752 /* Random numbers generated using SLIB's pseudo-random number generator.
753 * This hashes the input alphabet. */
754 0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46,
755 0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602,
756 0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6,
757 0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a,
758 0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58,
759 0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b,
760 0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94,
761 0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176,
762 0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743,
763 0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6,
764 0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227,
765 0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d,
766 0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a,
767 0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31,
768 0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9,
769 0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6,
770 0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f,
771 0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f,
772 0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49,
773 0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037,
774 0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad,
775 0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f,
776 0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d,
777 0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b,
778 0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624,
779 0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272,
780 0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806,
781 0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050,
782 0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113,
783 0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2,
784 0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25,
785 0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5
786};
787
788/**************************************************************** 738/****************************************************************
789 Instruction tables 739 Instruction tables
790 *****************************************************************/ 740 *****************************************************************/
@@ -841,24 +791,32 @@ struct _xd3_code_table_desc
841 /* Assumes a single RUN instruction */ 791 /* Assumes a single RUN instruction */
842 /* Assumes that MIN_MATCH is 4 */ 792 /* Assumes that MIN_MATCH is 4 */
843 793
844 uint8_t add_sizes; /* Number of immediate-size single adds (default 17) */ 794 uint8_t add_sizes; /* Number of immediate-size single
795 adds (default 17) */
845 uint8_t near_modes; /* Number of near copy modes (default 4) */ 796 uint8_t near_modes; /* Number of near copy modes (default 4) */
846 uint8_t same_modes; /* Number of same copy modes (default 3) */ 797 uint8_t same_modes; /* Number of same copy modes (default 3) */
847 uint8_t cpy_sizes; /* Number of immediate-size single copies (default 15) */ 798 uint8_t cpy_sizes; /* Number of immediate-size single
848 799 copies (default 15) */
849 uint8_t addcopy_add_max; /* Maximum add size for an add-copy double instruction, 800
850 all modes (default 4) */ 801 uint8_t addcopy_add_max; /* Maximum add size for an add-copy
851 uint8_t addcopy_near_cpy_max; /* Maximum cpy size for an add-copy double instruction, 802 double instruction, all modes
852 up through VCD_NEAR modes (default 6) */ 803 (default 4) */
853 uint8_t addcopy_same_cpy_max; /* Maximum cpy size for an add-copy double instruction, 804 uint8_t addcopy_near_cpy_max; /* Maximum cpy size for an add-copy
854 VCD_SAME modes (default 4) */ 805 double instruction, up through
855 806 VCD_NEAR modes (default 6) */
856 uint8_t copyadd_add_max; /* Maximum add size for a copy-add double instruction, 807 uint8_t addcopy_same_cpy_max; /* Maximum cpy size for an add-copy
857 all modes (default 1) */ 808 double instruction, VCD_SAME modes
858 uint8_t copyadd_near_cpy_max; /* Maximum cpy size for a copy-add double instruction, 809 (default 4) */
859 up through VCD_NEAR modes (default 4) */ 810
860 uint8_t copyadd_same_cpy_max; /* Maximum cpy size for a copy-add double instruction, 811 uint8_t copyadd_add_max; /* Maximum add size for a copy-add
861 VCD_SAME modes (default 4) */ 812 double instruction, all modes
813 (default 1) */
814 uint8_t copyadd_near_cpy_max; /* Maximum cpy size for a copy-add
815 double instruction, up through
816 VCD_NEAR modes (default 4) */
817 uint8_t copyadd_same_cpy_max; /* Maximum cpy size for a copy-add
818 double instruction, VCD_SAME modes
819 (default 4) */
862 820
863 xd3_code_table_sizes addcopy_max_sizes[MAX_MODES]; 821 xd3_code_table_sizes addcopy_max_sizes[MAX_MODES];
864 xd3_code_table_sizes copyadd_max_sizes[MAX_MODES]; 822 xd3_code_table_sizes copyadd_max_sizes[MAX_MODES];
@@ -880,17 +838,20 @@ static const xd3_code_table_desc __rfc3284_code_table_desc = {
880 4, /* copy-add max cpy, same */ 838 4, /* copy-add max cpy, same */
881 839
882 /* addcopy */ 840 /* addcopy */
883 { {6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},{4,235,1},{4,239,1},{4,243,1} }, 841 { {6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},
842 {4,235,1},{4,239,1},{4,243,1} },
884 /* copyadd */ 843 /* copyadd */
885 { {4,247,1},{4,248,1},{4,249,1},{4,250,1},{4,251,1},{4,252,1},{4,253,1},{4,254,1},{4,255,1} }, 844 { {4,247,1},{4,248,1},{4,249,1},{4,250,1},{4,251,1},{4,252,1},
845 {4,253,1},{4,254,1},{4,255,1} },
886}; 846};
887 847
888/* Computes code table entries of TBL using the specified description. */ 848/* Computes code table entries of TBL using the specified description. */
889static void 849static void
890xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl) 850xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl)
891{ 851{
892 usize_t size1, size2, mode; 852 uint8_t size1, size2;
893 usize_t cpy_modes = 2 + desc->near_modes + desc->same_modes; 853 uint8_t mode;
854 usize_t cpy_modes = 2U + desc->near_modes + desc->same_modes;
894 xd3_dinst *d = tbl; 855 xd3_dinst *d = tbl;
895 856
896 (d++)->type1 = XD3_RUN; 857 (d++)->type1 = XD3_RUN;
@@ -906,7 +867,8 @@ xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl)
906 { 867 {
907 (d++)->type1 = XD3_CPY + mode; 868 (d++)->type1 = XD3_CPY + mode;
908 869
909 for (size1 = MIN_MATCH; size1 < MIN_MATCH + desc->cpy_sizes; size1 += 1, d += 1) 870 for (size1 = MIN_MATCH; size1 < MIN_MATCH + desc->cpy_sizes;
871 size1 += 1, d += 1)
910 { 872 {
911 d->type1 = XD3_CPY + mode; 873 d->type1 = XD3_CPY + mode;
912 d->size1 = size1; 874 d->size1 = size1;
@@ -998,7 +960,7 @@ xd3_choose_instruction (xd3_rinst *prev, xd3_rinst *inst)
998 960
999 default: 961 default:
1000 { 962 {
1001 int mode = inst->type - XD3_CPY; 963 uint8_t mode = inst->type - XD3_CPY;
1002 964
1003 XD3_ASSERT (inst->type >= XD3_CPY && inst->type < 12); 965 XD3_ASSERT (inst->type >= XD3_CPY && inst->type < 12);
1004 966
@@ -1015,8 +977,9 @@ xd3_choose_instruction (xd3_rinst *prev, xd3_rinst *inst)
1015 if ( (inst->size <= 6) && 977 if ( (inst->size <= 6) &&
1016 (mode <= 5) ) 978 (mode <= 5) )
1017 { 979 {
1018 prev->code2 = 163 + (mode * 12) + (3 * (prev->size - 1)) + (inst->size - 4); 980 prev->code2 = (uint8_t)(163 + (mode * 12) +
1019 981 (3 * (prev->size - 1)) +
982 (inst->size - 4));
1020 XD3_ASSERT (prev->code2 <= 234); 983 XD3_ASSERT (prev->code2 <= 234);
1021 } 984 }
1022 else if ( (inst->size == 4) && 985 else if ( (inst->size == 4) &&
@@ -1077,10 +1040,10 @@ xd3_check_pow2 (xoff_t value, usize_t *logof)
1077 return XD3_INTERNAL; 1040 return XD3_INTERNAL;
1078} 1041}
1079 1042
1080size_t 1043usize_t
1081xd3_pow2_roundup (size_t x) 1044xd3_pow2_roundup (usize_t x)
1082{ 1045{
1083 size_t i = 1; 1046 usize_t i = 1;
1084 while (x > i) { 1047 while (x > i) {
1085 i <<= 1U; 1048 i <<= 1U;
1086 } 1049 }
@@ -1122,7 +1085,8 @@ xd3_round_blksize (usize_t sz, usize_t blksz)
1122 ***********************************************************************/ 1085 ***********************************************************************/
1123 1086
1124#define A32_BASE 65521L /* Largest prime smaller than 2^16 */ 1087#define A32_BASE 65521L /* Largest prime smaller than 2^16 */
1125#define A32_NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ 1088#define A32_NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2
1089 + (n+1)(BASE-1) <= 2^32-1 */
1126 1090
1127#define A32_DO1(buf,i) {s1 += buf[i]; s2 += s1;} 1091#define A32_DO1(buf,i) {s1 += buf[i]; s2 += s1;}
1128#define A32_DO2(buf,i) A32_DO1(buf,i); A32_DO1(buf,i+1); 1092#define A32_DO2(buf,i) A32_DO1(buf,i); A32_DO1(buf,i+1);
@@ -1130,11 +1094,10 @@ xd3_round_blksize (usize_t sz, usize_t blksz)
1130#define A32_DO8(buf,i) A32_DO4(buf,i); A32_DO4(buf,i+4); 1094#define A32_DO8(buf,i) A32_DO4(buf,i); A32_DO4(buf,i+4);
1131#define A32_DO16(buf) A32_DO8(buf,0); A32_DO8(buf,8); 1095#define A32_DO16(buf) A32_DO8(buf,0); A32_DO8(buf,8);
1132 1096
1133static unsigned long adler32 (unsigned long adler, const uint8_t *buf, 1097static uint32_t adler32 (uint32_t adler, const uint8_t *buf, usize_t len)
1134 usize_t len)
1135{ 1098{
1136 unsigned long s1 = adler & 0xffff; 1099 uint32_t s1 = adler & 0xffffU;
1137 unsigned long s2 = (adler >> 16) & 0xffff; 1100 uint32_t s2 = (adler >> 16) & 0xffffU;
1138 int k; 1101 int k;
1139 1102
1140 while (len > 0) 1103 while (len > 0)
@@ -1329,7 +1292,8 @@ xd3_encode_address (xd3_stream *stream,
1329 uint8_t* mode) 1292 uint8_t* mode)
1330{ 1293{
1331 usize_t d, bestd; 1294 usize_t d, bestd;
1332 usize_t i, bestm, ret; 1295 usize_t i, bestm;
1296 int ret;
1333 xd3_addr_cache* acache = & stream->acache; 1297 xd3_addr_cache* acache = & stream->acache;
1334 1298
1335#define SMALLEST_INT(x) do { if (((x) & ~127U) == 0) { goto good; } } while (0) 1299#define SMALLEST_INT(x) do { if (((x) & ~127U) == 0) { goto good; } } while (0)
@@ -1403,7 +1367,7 @@ xd3_encode_address (xd3_stream *stream,
1403static int 1367static int
1404xd3_decode_address (xd3_stream *stream, usize_t here, 1368xd3_decode_address (xd3_stream *stream, usize_t here,
1405 usize_t mode, const uint8_t **inpp, 1369 usize_t mode, const uint8_t **inpp,
1406 const uint8_t *max, uint32_t *valp) 1370 const uint8_t *max, usize_t *valp)
1407{ 1371{
1408 int ret; 1372 int ret;
1409 usize_t same_start = 2 + stream->acache.s_near; 1373 usize_t same_start = 2 + stream->acache.s_near;
@@ -1620,11 +1584,13 @@ xd3_free_stream (xd3_stream *stream)
1620 xd3_free (stream, tmp); 1584 xd3_free (stream, tmp);
1621 } 1585 }
1622 1586
1587#if XD3_ENCODER
1623 xd3_free (stream, stream->large_table); 1588 xd3_free (stream, stream->large_table);
1624 xd3_free (stream, stream->small_table); 1589 xd3_free (stream, stream->small_table);
1590 xd3_free (stream, stream->large_hash.powers);
1591 xd3_free (stream, stream->small_hash.powers);
1625 xd3_free (stream, stream->small_prev); 1592 xd3_free (stream, stream->small_prev);
1626 1593
1627#if XD3_ENCODER
1628 { 1594 {
1629 int i; 1595 int i;
1630 for (i = 0; i < ENC_SECTS; i += 1) 1596 for (i = 0; i < ENC_SECTS; i += 1)
@@ -1993,7 +1959,7 @@ xd3_set_source (xd3_stream *stream,
1993 } 1959 }
1994 1960
1995 src->shiftby = shiftby; 1961 src->shiftby = shiftby;
1996 src->maskby = (1 << shiftby) - 1; 1962 src->maskby = (1ULL << shiftby) - 1ULL;
1997 1963
1998 if (xd3_check_pow2 (src->max_winsize, NULL) != 0) 1964 if (xd3_check_pow2 (src->max_winsize, NULL) != 0)
1999 { 1965 {
@@ -2012,7 +1978,8 @@ xd3_set_source_and_size (xd3_stream *stream,
2012 if (ret == 0) 1978 if (ret == 0)
2013 { 1979 {
2014 stream->src->eof_known = 1; 1980 stream->src->eof_known = 1;
2015 1981 IF_DEBUG2 (DP(RINT "[set source] size known %"Q"\n",
1982 source_size));
2016 xd3_blksize_div(source_size, 1983 xd3_blksize_div(source_size,
2017 stream->src, 1984 stream->src,
2018 &stream->src->max_blkno, 1985 &stream->src->max_blkno,
@@ -2192,22 +2159,22 @@ xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst)
2192 XD3_ASSERT (inst->addr >= src->srcbase); 2159 XD3_ASSERT (inst->addr >= src->srcbase);
2193 XD3_ASSERT (inst->addr + inst->size <= 2160 XD3_ASSERT (inst->addr + inst->size <=
2194 src->srcbase + src->srclen); 2161 src->srcbase + src->srclen);
2195 addr = (usize_t)(inst->addr - src->srcbase); 2162 addr = inst->addr - src->srcbase;
2196 stream->n_scpy += 1; 2163 stream->n_scpy += 1;
2197 stream->l_scpy += (xoff_t) inst->size; 2164 stream->l_scpy += inst->size;
2198 } 2165 }
2199 else 2166 else
2200 { 2167 {
2201 /* with source window: target copy address is offset 2168 /* with source window: target copy address is offset
2202 * by taroff. */ 2169 * by taroff. */
2203 addr = stream->taroff + (usize_t) inst->addr; 2170 addr = stream->taroff + inst->addr;
2204 stream->n_tcpy += 1; 2171 stream->n_tcpy += 1;
2205 stream->l_tcpy += (xoff_t) inst->size; 2172 stream->l_tcpy += inst->size;
2206 } 2173 }
2207 } 2174 }
2208 else 2175 else
2209 { 2176 {
2210 addr = (usize_t) inst->addr; 2177 addr = inst->addr;
2211 stream->n_tcpy += 1; 2178 stream->n_tcpy += 1;
2212 stream->l_tcpy += inst->size; 2179 stream->l_tcpy += inst->size;
2213 } 2180 }
@@ -2223,7 +2190,7 @@ xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst)
2223 2190
2224 IF_DEBUG2 ({ 2191 IF_DEBUG2 ({
2225 static int cnt; 2192 static int cnt;
2226 DP(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %u\n", 2193 DP(RINT "[iopt copy:%d] pos %"Q"-%"Q" addr %"Q"-%"Q" size %u\n",
2227 cnt++, 2194 cnt++,
2228 stream->total_in + inst->pos, 2195 stream->total_in + inst->pos,
2229 stream->total_in + inst->pos + inst->size, 2196 stream->total_in + inst->pos + inst->size,
@@ -2240,7 +2207,7 @@ xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst)
2240 2207
2241 IF_DEBUG2 ({ 2208 IF_DEBUG2 ({
2242 static int cnt; 2209 static int cnt;
2243 DP(RINT "[iopt run:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size); 2210 DP(RINT "[iopt run:%d] pos %"Q" size %u\n", cnt++, stream->total_in + inst->pos, inst->size);
2244 }); 2211 });
2245 break; 2212 break;
2246 } 2213 }
@@ -2254,7 +2221,7 @@ xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst)
2254 2221
2255 IF_DEBUG2 ({ 2222 IF_DEBUG2 ({
2256 static int cnt; 2223 static int cnt;
2257 DP(RINT "[iopt add:%d] pos %"Q"u size %u\n", cnt++, stream->total_in + inst->pos, inst->size); 2224 DP(RINT "[iopt add:%d] pos %"Q" size %u\n", cnt++, stream->total_in + inst->pos, inst->size);
2258 }); 2225 });
2259 2226
2260 break; 2227 break;
@@ -2273,7 +2240,8 @@ xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst)
2273 { 2240 {
2274 if (stream->iout->code2 != 0) 2241 if (stream->iout->code2 != 0)
2275 { 2242 {
2276 if ((ret = xd3_emit_double (stream, stream->iout, inst, stream->iout->code2))) { return ret; } 2243 if ((ret = xd3_emit_double (stream, stream->iout, inst,
2244 stream->iout->code2))) { return ret; }
2277 2245
2278 xd3_iopt_free_nonadd (stream, stream->iout); 2246 xd3_iopt_free_nonadd (stream, stream->iout);
2279 xd3_iopt_free_nonadd (stream, inst); 2247 xd3_iopt_free_nonadd (stream, inst);
@@ -2630,7 +2598,7 @@ xd3_iopt_last_matched (xd3_stream *stream)
2630 ***********************************************************/ 2598 ***********************************************************/
2631 2599
2632static int 2600static int
2633xd3_emit_single (xd3_stream *stream, xd3_rinst *single, usize_t code) 2601xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint8_t code)
2634{ 2602{
2635 int has_size = stream->code_table[code].size1 == 0; 2603 int has_size = stream->code_table[code].size1 == 0;
2636 int ret; 2604 int ret;
@@ -2659,7 +2627,7 @@ xd3_emit_single (xd3_stream *stream, xd3_rinst *single, usize_t code)
2659 2627
2660static int 2628static int
2661xd3_emit_double (xd3_stream *stream, xd3_rinst *first, 2629xd3_emit_double (xd3_stream *stream, xd3_rinst *first,
2662 xd3_rinst *second, usize_t code) 2630 xd3_rinst *second, uint8_t code)
2663{ 2631{
2664 int ret; 2632 int ret;
2665 2633
@@ -2729,8 +2697,8 @@ xd3_emit_hdr (xd3_stream *stream)
2729 int use_secondary = stream->sec_type != NULL; 2697 int use_secondary = stream->sec_type != NULL;
2730 int use_adler32 = stream->flags & (XD3_ADLER32 | XD3_ADLER32_RECODE); 2698 int use_adler32 = stream->flags & (XD3_ADLER32 | XD3_ADLER32_RECODE);
2731 int vcd_source = xd3_encoder_used_source (stream); 2699 int vcd_source = xd3_encoder_used_source (stream);
2732 usize_t win_ind = 0; 2700 uint8_t win_ind = 0;
2733 usize_t del_ind = 0; 2701 uint8_t del_ind = 0;
2734 usize_t enc_len; 2702 usize_t enc_len;
2735 usize_t tgt_len; 2703 usize_t tgt_len;
2736 usize_t data_len; 2704 usize_t data_len;
@@ -2739,7 +2707,7 @@ xd3_emit_hdr (xd3_stream *stream)
2739 2707
2740 if (stream->current_window == 0) 2708 if (stream->current_window == 0)
2741 { 2709 {
2742 usize_t hdr_ind = 0; 2710 uint8_t hdr_ind = 0;
2743 int use_appheader = stream->enc_appheader != NULL; 2711 int use_appheader = stream->enc_appheader != NULL;
2744 2712
2745 if (use_secondary) { hdr_ind |= VCD_SECONDARY; } 2713 if (use_secondary) { hdr_ind |= VCD_SECONDARY; }
@@ -2837,7 +2805,7 @@ xd3_emit_hdr (xd3_stream *stream)
2837 inst_len = xd3_sizeof_output (INST_HEAD (stream)); 2805 inst_len = xd3_sizeof_output (INST_HEAD (stream));
2838 addr_len = xd3_sizeof_output (ADDR_HEAD (stream)); 2806 addr_len = xd3_sizeof_output (ADDR_HEAD (stream));
2839 2807
2840 /* The enc_len field is a redundency for future extensions.*/ 2808 /* The enc_len field is a redundency for future extensions. */
2841 enc_len = (1 + (xd3_sizeof_size (tgt_len) + 2809 enc_len = (1 + (xd3_sizeof_size (tgt_len) +
2842 xd3_sizeof_size (data_len) + 2810 xd3_sizeof_size (data_len) +
2843 xd3_sizeof_size (inst_len) + 2811 xd3_sizeof_size (inst_len) +
@@ -2978,6 +2946,7 @@ xd3_alloc_iopt (xd3_stream *stream, usize_t elts)
2978static int 2946static int
2979xd3_encode_init (xd3_stream *stream, int full_init) 2947xd3_encode_init (xd3_stream *stream, int full_init)
2980{ 2948{
2949 int ret;
2981 int i; 2950 int i;
2982 2951
2983 if (full_init) 2952 if (full_init)
@@ -2990,12 +2959,17 @@ xd3_encode_init (xd3_stream *stream, int full_init)
2990 * identical or short inputs require no table allocation. */ 2959 * identical or short inputs require no table allocation. */
2991 if (large_comp) 2960 if (large_comp)
2992 { 2961 {
2962 /* TODO Need to check for overflow here. */
2993 usize_t hash_values = stream->src->max_winsize / 2963 usize_t hash_values = stream->src->max_winsize /
2994 stream->smatcher.large_step; 2964 stream->smatcher.large_step;
2995 2965
2996 xd3_size_hashtable (stream, 2966 if ((ret = xd3_size_hashtable (stream,
2997 hash_values, 2967 hash_values,
2998 & stream->large_hash); 2968 stream->smatcher.large_look,
2969 & stream->large_hash)))
2970 {
2971 return ret;
2972 }
2999 } 2973 }
3000 2974
3001 if (small_comp) 2975 if (small_comp)
@@ -3005,9 +2979,13 @@ xd3_encode_init (xd3_stream *stream, int full_init)
3005 * also sort of makes sense. @@@ */ 2979 * also sort of makes sense. @@@ */
3006 usize_t hash_values = stream->winsize; 2980 usize_t hash_values = stream->winsize;
3007 2981
3008 xd3_size_hashtable (stream, 2982 if ((ret = xd3_size_hashtable (stream,
3009 hash_values, 2983 hash_values,
3010 & stream->small_hash); 2984 stream->smatcher.small_look,
2985 & stream->small_hash)))
2986 {
2987 return ret;
2988 }
3011 } 2989 }
3012 } 2990 }
3013 2991
@@ -3156,7 +3134,7 @@ xd3_encode_input (xd3_stream *stream)
3156 3134
3157 stream->enc_state = ENC_SEARCH; 3135 stream->enc_state = ENC_SEARCH;
3158 3136
3159 IF_DEBUG2 (DP(RINT "[WINSTART:%"Q"u] input bytes %u offset %"Q"u\n", 3137 IF_DEBUG2 (DP(RINT "[WINSTART:%"Q"] input bytes %u offset %"Q"\n",
3160 stream->current_window, stream->avail_in, 3138 stream->current_window, stream->avail_in,
3161 stream->total_in)); 3139 stream->total_in));
3162 return XD3_WINSTART; 3140 return XD3_WINSTART;
@@ -3270,7 +3248,7 @@ xd3_encode_input (xd3_stream *stream)
3270 stream->enc_state = ENC_POSTOUT; 3248 stream->enc_state = ENC_POSTOUT;
3271 stream->next_out = stream->enc_current->base; 3249 stream->next_out = stream->enc_current->base;
3272 stream->avail_out = stream->enc_current->next; 3250 stream->avail_out = stream->enc_current->next;
3273 stream->total_out += (xoff_t) stream->avail_out; 3251 stream->total_out += stream->avail_out;
3274 3252
3275 /* If there is any output in this buffer, return it, otherwise 3253 /* If there is any output in this buffer, return it, otherwise
3276 * fall through to handle the next buffer or finish the window 3254 * fall through to handle the next buffer or finish the window
@@ -3295,10 +3273,10 @@ xd3_encode_input (xd3_stream *stream)
3295 goto enc_output; 3273 goto enc_output;
3296 } 3274 }
3297 3275
3298 stream->total_in += (xoff_t) stream->avail_in; 3276 stream->total_in += stream->avail_in;
3299 stream->enc_state = ENC_POSTWIN; 3277 stream->enc_state = ENC_POSTWIN;
3300 3278
3301 IF_DEBUG2 (DP(RINT "[WINFINISH:%"Q"u] in=%"Q"u\n", 3279 IF_DEBUG2 (DP(RINT "[WINFINISH:%"Q"] in=%"Q"\n",
3302 stream->current_window, 3280 stream->current_window,
3303 stream->total_in)); 3281 stream->total_in));
3304 return XD3_WINFINISH; 3282 return XD3_WINFINISH;
@@ -3607,7 +3585,7 @@ xd3_string_match_init (xd3_stream *stream)
3607 return 0; 3585 return 0;
3608} 3586}
3609 3587
3610#if XD3_USE_LARGEFILE64 3588#if XD3_USE_LARGEFILE64 && !XD3_USE_LARGESIZET
3611/* This function handles the 32/64bit ambiguity -- file positions are 64bit 3589/* This function handles the 32/64bit ambiguity -- file positions are 64bit
3612 * but the hash table for source-offsets is 32bit. */ 3590 * but the hash table for source-offsets is 32bit. */
3613static xoff_t 3591static xoff_t
@@ -3634,7 +3612,7 @@ xd3_source_cksum_offset(xd3_stream *stream, usize_t low)
3634static xoff_t 3612static xoff_t
3635xd3_source_cksum_offset(xd3_stream *stream, usize_t low) 3613xd3_source_cksum_offset(xd3_stream *stream, usize_t low)
3636{ 3614{
3637 return (xoff_t) low; 3615 return low;
3638} 3616}
3639#endif 3617#endif
3640 3618
@@ -3668,7 +3646,7 @@ xd3_srcwin_setup (xd3_stream *stream)
3668 * use smaller windows. */ 3646 * use smaller windows. */
3669 length = stream->match_maxaddr - stream->match_minaddr; 3647 length = stream->match_maxaddr - stream->match_minaddr;
3670 3648
3671 x = (xoff_t) USIZE_T_MAX; 3649 x = USIZE_T_MAX;
3672 if (length > x) 3650 if (length > x)
3673 { 3651 {
3674 stream->msg = "source window length overflow (not 64bit)"; 3652 stream->msg = "source window length overflow (not 64bit)";
@@ -3687,21 +3665,20 @@ xd3_srcwin_setup (xd3_stream *stream)
3687 3665
3688 /* Otherwise, we have to make a guess. More copies may still be 3666 /* Otherwise, we have to make a guess. More copies may still be
3689 * issued, but we have to decide the source window base and length 3667 * issued, but we have to decide the source window base and length
3690 * now. */ 3668 * now.
3691 /* TODO: This may not working well in practice, more testing needed. */ 3669 * TODO: This may not working well in practice, more testing needed. */
3692 src->srcbase = stream->match_minaddr; 3670 src->srcbase = stream->match_minaddr;
3693 src->srclen = xd3_max ((usize_t) length, 3671 src->srclen = xd3_max ((usize_t) length,
3694 stream->avail_in + (stream->avail_in >> 2)); 3672 stream->avail_in + (stream->avail_in >> 2));
3695 3673
3696 if (src->eof_known) 3674 if (src->eof_known)
3697 { 3675 {
3698 /* Note: if the source size is known, we must reduce srclen or 3676 /* Note: if the source size is known, we must reduce srclen or
3699 * code that expects to pass a single block w/ getblk == NULL 3677 * code that expects to pass a single block w/ getblk == NULL
3700 * will not function, as the code will return GETSRCBLK asking 3678 * will not function, as the code will return GETSRCBLK asking
3701 * for the second block. */ 3679 * for the second block. */
3702 src->srclen = xd3_min (src->srclen, xd3_source_eof(src) - src->srcbase); 3680 src->srclen = xd3_min (src->srclen, xd3_source_eof(src) - src->srcbase);
3703 } 3681 }
3704
3705 IF_DEBUG1 (DP(RINT "[srcwin_setup_constrained] base %"Q"u len %u\n", 3682 IF_DEBUG1 (DP(RINT "[srcwin_setup_constrained] base %"Q"u len %u\n",
3706 src->srcbase, src->srclen)); 3683 src->srcbase, src->srclen));
3707 3684
@@ -3768,11 +3745,12 @@ xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
3768 3745
3769 /* Going backwards, the 1.5-pass algorithm allows some 3746 /* Going backwards, the 1.5-pass algorithm allows some
3770 * already-matched input may be covered by a longer source match. 3747 * already-matched input may be covered by a longer source match.
3771 * The greedy algorithm does not allow this. */ 3748 * The greedy algorithm does not allow this.
3749 * TODO: Measure this. */
3772 if (stream->flags & XD3_BEGREEDY) 3750 if (stream->flags & XD3_BEGREEDY)
3773 { 3751 {
3774 /* The greedy algorithm allows backward matching to the last 3752 /* The greedy algorithm allows backward matching to the last
3775 matched position. */ 3753 * matched position. */
3776 greedy_or_not = xd3_iopt_last_matched (stream); 3754 greedy_or_not = xd3_iopt_last_matched (stream);
3777 } 3755 }
3778 else 3756 else
@@ -3801,7 +3779,8 @@ xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
3801 * 0--src->size. We compare the usize_t 3779 * 0--src->size. We compare the usize_t
3802 * match_maxfwd/match_maxback against the xoff_t 3780 * match_maxfwd/match_maxback against the xoff_t
3803 * src->size/srcpos values and take the min. */ 3781 * src->size/srcpos values and take the min. */
3804 if (srcpos < (xoff_t) stream->match_maxback) 3782 /* TODO #if XD3_USE_LARGESIZET ? */
3783 if (srcpos < stream->match_maxback)
3805 { 3784 {
3806 stream->match_maxback = (usize_t) srcpos; 3785 stream->match_maxback = (usize_t) srcpos;
3807 } 3786 }
@@ -3810,14 +3789,14 @@ xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
3810 { 3789 {
3811 xoff_t srcavail = xd3_source_eof (src) - srcpos; 3790 xoff_t srcavail = xd3_source_eof (src) - srcpos;
3812 3791
3813 if (srcavail < (xoff_t) stream->match_maxfwd) 3792 if (srcavail < stream->match_maxfwd)
3814 { 3793 {
3815 stream->match_maxfwd = (usize_t) srcavail; 3794 stream->match_maxfwd = (usize_t) srcavail;
3816 } 3795 }
3817 } 3796 }
3818 3797
3819 IF_DEBUG2(DP(RINT 3798 IF_DEBUG2(DP(RINT
3820 "[match_setup] srcpos %"Q"u (tgtpos %"Q"u) " 3799 "[match_setup] srcpos %"Q" (tgtpos %"Q") "
3821 "unrestricted maxback %u maxfwd %u\n", 3800 "unrestricted maxback %u maxfwd %u\n",
3822 srcpos, 3801 srcpos,
3823 stream->total_in + stream->input_position, 3802 stream->total_in + stream->input_position,
@@ -3831,7 +3810,7 @@ xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
3831 3810
3832 /* Restricted case: fail if the srcpos lies outside the source window */ 3811 /* Restricted case: fail if the srcpos lies outside the source window */
3833 if ((srcpos < src->srcbase) || 3812 if ((srcpos < src->srcbase) ||
3834 (srcpos > (src->srcbase + (xoff_t) src->srclen))) 3813 (srcpos > (src->srcbase + src->srclen)))
3835 { 3814 {
3836 IF_DEBUG1(DP(RINT "[match_setup] restricted source window failure\n")); 3815 IF_DEBUG1(DP(RINT "[match_setup] restricted source window failure\n"));
3837 goto bad; 3816 goto bad;
@@ -3846,7 +3825,7 @@ xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
3846 stream->match_maxback = srcavail; 3825 stream->match_maxback = srcavail;
3847 } 3826 }
3848 3827
3849 srcavail = (usize_t) (src->srcbase + (xoff_t) src->srclen - srcpos); 3828 srcavail = src->srcbase + src->srclen - srcpos;
3850 if (srcavail < stream->match_maxfwd) 3829 if (srcavail < stream->match_maxfwd)
3851 { 3830 {
3852 stream->match_maxfwd = srcavail; 3831 stream->match_maxfwd = srcavail;
@@ -3874,19 +3853,19 @@ xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
3874 return 1; 3853 return 1;
3875} 3854}
3876 3855
3877static inline int 3856static inline usize_t
3878xd3_forward_match(const uint8_t *s1c, const uint8_t *s2c, int n) 3857xd3_forward_match(const uint8_t *s1c, const uint8_t *s2c, usize_t n)
3879{ 3858{
3880 int i = 0; 3859 usize_t i = 0;
3881#if UNALIGNED_OK 3860#if UNALIGNED_OK
3882 int nint = n / sizeof(int); 3861 usize_t nint = n / sizeof(int);
3883 3862
3884 if (nint >> 3) 3863 if (nint >> 3)
3885 { 3864 {
3886 int j = 0; 3865 usize_t j = 0;
3887 const int *s1 = (const int*)s1c; 3866 const int *s1 = (const int*)s1c;
3888 const int *s2 = (const int*)s2c; 3867 const int *s2 = (const int*)s2c;
3889 int nint_8 = nint - 8; 3868 usize_t nint_8 = nint - 8;
3890 3869
3891 while (i <= nint_8 && 3870 while (i <= nint_8 &&
3892 s1[i++] == s2[j++] && 3871 s1[i++] == s2[j++] &&
@@ -3933,6 +3912,9 @@ xd3_source_extend_match (xd3_stream *stream)
3933 usize_t tryrem; /* tryrem is the number of matchable bytes */ 3912 usize_t tryrem; /* tryrem is the number of matchable bytes */
3934 usize_t matched; 3913 usize_t matched;
3935 3914
3915 IF_DEBUG2(DP(RINT "[extend match] srcpos %"Q"\n",
3916 stream->match_srcpos));
3917
3936 XD3_ASSERT (src != NULL); 3918 XD3_ASSERT (src != NULL);
3937 3919
3938 /* Does it make sense to compute backward match AFTER forward match? */ 3920 /* Does it make sense to compute backward match AFTER forward match? */
@@ -3980,7 +3962,7 @@ xd3_source_extend_match (xd3_stream *stream)
3980 3962
3981 tryrem = xd3_min (tryoff, stream->match_maxback - stream->match_back); 3963 tryrem = xd3_min (tryoff, stream->match_maxback - stream->match_back);
3982 3964
3983 IF_DEBUG2(DP(RINT "[maxback] maxback %u trysrc %"Q"u/%u tgt %u tryrem %u\n", 3965 IF_DEBUG2(DP(RINT "[maxback] maxback %u trysrc %"Q"/%u tgt %u tryrem %u\n",
3984 stream->match_maxback, tryblk, tryoff, streamoff, tryrem)); 3966 stream->match_maxback, tryblk, tryoff, streamoff, tryrem));
3985 3967
3986 /* TODO: This code can be optimized similar to xd3_match_forward() */ 3968 /* TODO: This code can be optimized similar to xd3_match_forward() */
@@ -4321,7 +4303,7 @@ xd3_smatch (xd3_stream *stream,
4321static void 4303static void
4322xd3_verify_small_state (xd3_stream *stream, 4304xd3_verify_small_state (xd3_stream *stream,
4323 const uint8_t *inp, 4305 const uint8_t *inp,
4324 uint32_t x_cksum) 4306 uint32_t x_cksum)
4325{ 4307{
4326 uint32_t state; 4308 uint32_t state;
4327 uint32_t cksum = xd3_scksum (&state, inp, stream->smatcher.small_look); 4309 uint32_t cksum = xd3_scksum (&state, inp, stream->smatcher.small_look);
@@ -4332,9 +4314,9 @@ xd3_verify_small_state (xd3_stream *stream,
4332static void 4314static void
4333xd3_verify_large_state (xd3_stream *stream, 4315xd3_verify_large_state (xd3_stream *stream,
4334 const uint8_t *inp, 4316 const uint8_t *inp,
4335 uint32_t x_cksum) 4317 usize_t x_cksum)
4336{ 4318{
4337 uint32_t cksum = xd3_lcksum (inp, stream->smatcher.large_look); 4319 usize_t cksum = xd3_large_cksum (&stream->large_hash, inp, stream->smatcher.large_look);
4338 XD3_ASSERT (cksum == x_cksum); 4320 XD3_ASSERT (cksum == x_cksum);
4339} 4321}
4340static void 4322static void
@@ -4478,8 +4460,12 @@ xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point)
4478 4460
4479 do 4461 do
4480 { 4462 {
4481 uint32_t cksum = xd3_lcksum (stream->src->curblk + blkpos, 4463 /* TODO: This would be significantly faster if the compiler
4482 stream->smatcher.large_look); 4464 * knew stream->smatcher.large_look (which the template for
4465 * xd3_string_match_* allows). */
4466 usize_t cksum = xd3_large_cksum (&stream->large_hash,
4467 stream->src->curblk + blkpos,
4468 stream->smatcher.large_look);
4483 usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum); 4469 usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum);
4484 4470
4485 stream->large_table[hval] = 4471 stream->large_table[hval] =
@@ -4495,6 +4481,15 @@ xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point)
4495 stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize; 4481 stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize;
4496 } 4482 }
4497 4483
4484 IF_DEBUG1 (DP(RINT
4485 "[srcwin_move_point] exited loop T=%"Q"{%"Q"} "
4486 "S=%"Q" EOF=%"Q" %s\n",
4487 stream->total_in + stream->input_position,
4488 logical_input_cksum_pos,
4489 stream->srcwin_cksum_pos,
4490 xd3_source_eof (stream->src),
4491 stream->src->eof_known ? "known" : "unknown"));
4492
4498 if (stream->src->eof_known) 4493 if (stream->src->eof_known)
4499 { 4494 {
4500 xoff_t source_size = xd3_source_eof (stream->src); 4495 xoff_t source_size = xd3_source_eof (stream->src);
@@ -4574,7 +4569,7 @@ XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream)
4574 const uint8_t *inp; 4569 const uint8_t *inp;
4575 uint32_t scksum = 0; 4570 uint32_t scksum = 0;
4576 uint32_t scksum_state = 0; 4571 uint32_t scksum_state = 0;
4577 uint32_t lcksum = 0; 4572 usize_t lcksum = 0;
4578 usize_t sinx; 4573 usize_t sinx;
4579 usize_t linx; 4574 usize_t linx;
4580 uint8_t run_c; 4575 uint8_t run_c;
@@ -4650,7 +4645,7 @@ XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream)
4650 return ret; 4645 return ret;
4651 } 4646 }
4652 4647
4653 lcksum = xd3_lcksum (inp, LLOOK); 4648 lcksum = xd3_large_cksum (&stream->large_hash, inp, LLOOK);
4654 } 4649 }
4655 4650
4656 /* TRYLAZYLEN: True if a certain length match should be followed by 4651 /* TRYLAZYLEN: True if a certain length match should be followed by
@@ -4826,7 +4821,7 @@ XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream)
4826 4821
4827 if (DO_LARGE && (stream->input_position + LLOOK < stream->avail_in)) 4822 if (DO_LARGE && (stream->input_position + LLOOK < stream->avail_in))
4828 { 4823 {
4829 lcksum = xd3_large_cksum_update (lcksum, inp, LLOOK); 4824 lcksum = xd3_large_cksum_update (&stream->large_hash, lcksum, inp, LLOOK);
4830 } 4825 }
4831 } 4826 }
4832 4827
diff --git a/xdelta3/xdelta3.h b/xdelta3/xdelta3.h
index 6cc0f9f..1b134eb 100644
--- a/xdelta3/xdelta3.h
+++ b/xdelta3/xdelta3.h
@@ -17,7 +17,7 @@
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
20/* To know more about Xdelta, start by reading xdelta3.c. If you are 20/* To learn more about Xdelta, start by reading xdelta3.c. If you are
21 * ready to use the API, continue reading here. There are two 21 * ready to use the API, continue reading here. There are two
22 * interfaces -- xd3_encode_input and xd3_decode_input -- plus a dozen 22 * interfaces -- xd3_encode_input and xd3_decode_input -- plus a dozen
23 * or so related calls. This interface is styled after Zlib. */ 23 * or so related calls. This interface is styled after Zlib. */
@@ -25,7 +25,7 @@
25#ifndef _XDELTA3_H_ 25#ifndef _XDELTA3_H_
26#define _XDELTA3_H_ 26#define _XDELTA3_H_
27 27
28#define _POSIX_SOURCE 28#define _POSIX_SOURCE 200112L
29#define _ISOC99_SOURCE 29#define _ISOC99_SOURCE
30#define _C99_SOURCE 30#define _C99_SOURCE
31 31
@@ -71,7 +71,7 @@
71 * 71 *
72 * 8-16MB is reasonable, probably don't need to go larger. */ 72 * 8-16MB is reasonable, probably don't need to go larger. */
73#ifndef XD3_HARDMAXWINSIZE 73#ifndef XD3_HARDMAXWINSIZE
74#define XD3_HARDMAXWINSIZE (1U<<24) 74#define XD3_HARDMAXWINSIZE (1U<<26)
75#endif 75#endif
76/* The IOPT_SIZE value sets the size of a buffer used to batch 76/* The IOPT_SIZE value sets the size of a buffer used to batch
77 * overlapping copy instructions before they are optimized by picking 77 * overlapping copy instructions before they are optimized by picking
@@ -87,8 +87,7 @@
87#define XD3_DEFAULT_SPREVSZ (1U<<18) 87#define XD3_DEFAULT_SPREVSZ (1U<<18)
88#endif 88#endif
89 89
90/* The default compression level 90/* The default compression level */
91 */
92#ifndef XD3_DEFAULT_LEVEL 91#ifndef XD3_DEFAULT_LEVEL
93#define XD3_DEFAULT_LEVEL 3 92#define XD3_DEFAULT_LEVEL 3
94#endif 93#endif
@@ -101,6 +100,12 @@
101#define XD3_USE_LARGEFILE64 1 100#define XD3_USE_LARGEFILE64 1
102#endif 101#endif
103 102
103/* The source window size is limited to 2GB unless
104 * XD3_USE_LARGESIZET is defined to 1. */
105#ifndef XD3_USE_LARGESIZET
106#define XD3_USE_LARGESIZET 1
107#endif
108
104/* Sizes and addresses within VCDIFF windows are represented as usize_t 109/* Sizes and addresses within VCDIFF windows are represented as usize_t
105 * 110 *
106 * For source-file offsets and total file sizes, total input and 111 * For source-file offsets and total file sizes, total input and
@@ -109,19 +114,19 @@
109 * the 32bit boundary [xdelta3-test.h]). 114 * the 32bit boundary [xdelta3-test.h]).
110 */ 115 */
111#ifndef _WIN32 116#ifndef _WIN32
117#define __STDC_FORMAT_MACROS
118#include <inttypes.h>
112#include <stdint.h> 119#include <stdint.h>
113#else /* WIN32 case */ 120#else /* WIN32 case */
114#define WIN32_LEAN_AND_MEAN 121#define WIN32_LEAN_AND_MEAN
115 122
116#ifndef WINVER 123#ifndef WINVER
117#if XD3_USE_LARGEFILE64 124#if XD3_USE_LARGEFILE64
118/* 64 bit file offsets: uses GetFileSizeEx and SetFilePointerEx. 125/* 64 bit file offsets: uses GetFileSizeEx and SetFilePointerEx. */
119 * requires Win2000 or newer version of WinNT */
120#define WINVER 0x0500 126#define WINVER 0x0500
121#define _WIN32_WINNT 0x0500 127#define _WIN32_WINNT 0x0500
122#else /* xoff_t is 32bit */ 128#else /* xoff_t is 32bit */
123/* 32 bit (DWORD) file offsets: uses GetFileSize and 129/* 32 bit file offsets: uses GetFileSize and SetFilePointer. */
124 * SetFilePointer. compatible with win9x-me and WinNT4 */
125#define WINVER 0x0400 130#define WINVER 0x0400
126#define _WIN32_WINNT 0x0400 131#define _WIN32_WINNT 0x0400
127#endif /* if XD3_USE_LARGEFILE64 */ 132#endif /* if XD3_USE_LARGEFILE64 */
@@ -129,9 +134,8 @@
129 134
130#include <windows.h> 135#include <windows.h>
131 136
132/* _MSV_VER is defined by Microsoft tools, not by mingw32 */ 137/* _MSV_VER is defined by Microsoft tools, not by Mingw32 */
133#ifdef _MSC_VER 138#ifdef _MSC_VER
134/*#define inline*/
135typedef signed int ssize_t; 139typedef signed int ssize_t;
136#if _MSC_VER < 1600 140#if _MSC_VER < 1600
137typedef unsigned char uint8_t; 141typedef unsigned char uint8_t;
@@ -143,16 +147,16 @@ typedef ULONGLONG uint64_t;
143#include <stdint.h> 147#include <stdint.h>
144#endif /* _MSC_VER < 1600 */ 148#endif /* _MSC_VER < 1600 */
145#else /* _MSC_VER not defined */ 149#else /* _MSC_VER not defined */
146/* mingw32, lcc and watcom provide a proper header */ 150/* Mingw32 */
147#include <stdint.h> 151#include <stdint.h>
148#endif /* _MSC_VER defined */ 152#endif /* _MSC_VER defined */
149#endif /* _WIN32 defined */
150 153
151typedef uint32_t usize_t; 154#endif /* _WIN32 defined */
152 155
156/* Settings based on the size of xoff_t (32 vs 64 file offsets) */
153#if XD3_USE_LARGEFILE64 157#if XD3_USE_LARGEFILE64
154/* xoff_t is a 64-bit type */ 158/* xoff_t is a 64-bit type */
155#define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops, ... ? */ 159#define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops. */
156 160
157#ifndef _LARGEFILE_SOURCE 161#ifndef _LARGEFILE_SOURCE
158#define _LARGEFILE_SOURCE 162#define _LARGEFILE_SOURCE
@@ -162,6 +166,7 @@ typedef uint32_t usize_t;
162#define _FILE_OFFSET_BITS 64 166#define _FILE_OFFSET_BITS 64
163#endif 167#endif
164 168
169/* Set a xoff_t typedef and the "Q" printf insert. */
165#if defined(_WIN32) 170#if defined(_WIN32)
166typedef uint64_t xoff_t; 171typedef uint64_t xoff_t;
167/* Note: The following generates benign warnings in a mingw 172/* Note: The following generates benign warnings in a mingw
@@ -176,7 +181,7 @@ typedef size_t xoff_t;
176#elif SIZEOF_UNSIGNED_LONG_LONG == 8 181#elif SIZEOF_UNSIGNED_LONG_LONG == 8
177typedef unsigned long long xoff_t; 182typedef unsigned long long xoff_t;
178#define Q "ll" 183#define Q "ll"
179#endif /* #define Q */ 184#endif /* typedef and #define Q */
180 185
181#define SIZEOF_XOFF_T 8 186#define SIZEOF_XOFF_T 8
182 187
@@ -194,9 +199,45 @@ typedef uint32_t xoff_t;
194#define Q 199#define Q
195#endif /* 64 vs 32 bit xoff_t */ 200#endif /* 64 vs 32 bit xoff_t */
196 201
197/* Note: This gets modified in the 64bithash branch. */ 202/* Settings based on the size of usize_t (32 and 64 bit window size) */
203#if XD3_USE_LARGESIZET
204
205/* Set a usize_ttypedef and the "W" printf insert. */
206#if defined(_WIN32)
207typedef uint64_t usize_t;
208/* Note: The following generates benign warnings in a mingw
209 * cross-compiler */
210#define W "I64"
211#elif SIZEOF_UNSIGNED_LONG == 8
212typedef unsigned long usize_t;
213#define W "l"
214#elif SIZEOF_SIZE_T == 8
215typedef size_t usize_t;
216#define W "z"
217#elif SIZEOF_UNSIGNED_LONG_LONG == 8
218typedef unsigned long long usize_t;
219#define W "ll"
220#endif /* typedef and #define W */
221
222#define SIZEOF_USIZE_T 8
223
224#else /* XD3_USE_LARGESIZET == 0 */
225
226#if SIZEOF_UNSIGNED_INT == 4
227typedef unsigned int usize_t;
228#elif SIZEOF_UNSIGNED_LONG == 4
229typedef unsigned long usize_t;
230#else
231typedef uint32_t usize_t;
232#endif /* usize_t is 32 bits */
233
198#define SIZEOF_USIZE_T 4 234#define SIZEOF_USIZE_T 4
235#define W
199 236
237#endif /* 64 vs 32 bit usize_t */
238
239/* Settings based on the size of size_t (the system-provided,
240 * usually-but-maybe-not an unsigned type) */
200#if SIZEOF_SIZE_T == 4 241#if SIZEOF_SIZE_T == 4
201#define Z "z" 242#define Z "z"
202#elif SIZEOF_SIZE_T == 8 243#elif SIZEOF_SIZE_T == 8
@@ -339,9 +380,12 @@ typedef int (xd3_comp_table_func) (xd3_stream *stream,
339 380
340 381
341#if XD3_DEBUG 382#if XD3_DEBUG
342#define XD3_ASSERT(x) \ 383#define XD3_ASSERT(x) \
343 do { if (! (x)) { DP(RINT "%s:%d: XD3 assertion failed: %s\n", __FILE__, __LINE__, #x); \ 384 do { \
344 abort (); } } while (0) 385 if (! (x)) { \
386 DP(RINT "%s:%d: XD3 assertion failed: %s\n", \
387 __FILE__, __LINE__, #x); \
388 abort (); } } while (0)
345#else 389#else
346#define XD3_ASSERT(x) (void)0 390#define XD3_ASSERT(x) (void)0
347#endif /* XD3_DEBUG */ 391#endif /* XD3_DEBUG */
@@ -589,9 +633,9 @@ struct _xd3_dinst
589/* the decoded form of a single (half) instruction. */ 633/* the decoded form of a single (half) instruction. */
590struct _xd3_hinst 634struct _xd3_hinst
591{ 635{
592 uint8_t type; 636 uint8_t type;
593 uint32_t size; /* TODO: why decode breaks if this is usize_t? */ 637 usize_t size;
594 uint32_t addr; /* TODO: why decode breaks if this is usize_t? */ 638 usize_t addr;
595}; 639};
596 640
597/* the form of a whole-file instruction */ 641/* the form of a whole-file instruction */
@@ -618,7 +662,7 @@ struct _xd3_desect
618{ 662{
619 const uint8_t *buf; 663 const uint8_t *buf;
620 const uint8_t *buf_max; 664 const uint8_t *buf_max;
621 uint32_t size; /* TODO: why decode breaks if this is usize_t? */ 665 usize_t size;
622 usize_t pos; 666 usize_t pos;
623 667
624 /* used in xdelta3-decode.h */ 668 /* used in xdelta3-decode.h */
@@ -666,9 +710,13 @@ struct _xd3_smatcher
666/* hash table size & power-of-two hash function. */ 710/* hash table size & power-of-two hash function. */
667struct _xd3_hash_cfg 711struct _xd3_hash_cfg
668{ 712{
669 usize_t size; 713 usize_t size; // Number of buckets
670 usize_t shift; 714 usize_t shift;
671 usize_t mask; 715 usize_t mask;
716 usize_t look; // How wide is this checksum
717 usize_t multiplier; // K * powers[0]
718 usize_t *powers; // Array of [0,look) where powers[look-1] == 1
719 // and powers[N] = powers[N+1]*K (Rabin-Karp)
672}; 720};
673 721
674/* the sprev list */ 722/* the sprev list */
@@ -728,7 +776,7 @@ struct _xd3_config
728 xd3_alloc_func *alloc; 776 xd3_alloc_func *alloc;
729 xd3_free_func *freef; 777 xd3_free_func *freef;
730 void *opaque; /* Not used. */ 778 void *opaque; /* Not used. */
731 int flags; /* stream->flags are initialized 779 uint32_t flags; /* stream->flags are initialized
732 * from xd3_config & never 780 * from xd3_config & never
733 * modified by the library. Use 781 * modified by the library. Use
734 * xd3_set_flags to modify flags 782 * xd3_set_flags to modify flags
@@ -771,8 +819,8 @@ struct _xd3_source
771 usize_t srclen; /* length of this source window */ 819 usize_t srclen; /* length of this source window */
772 xoff_t srcbase; /* offset of this source window 820 xoff_t srcbase; /* offset of this source window
773 in the source itself */ 821 in the source itself */
774 int shiftby; /* for power-of-two blocksizes */ 822 usize_t shiftby; /* for power-of-two blocksizes */
775 int maskby; /* for power-of-two blocksizes */ 823 usize_t maskby; /* for power-of-two blocksizes */
776 xoff_t cpyoff_blocks; /* offset of dec_cpyoff in blocks */ 824 xoff_t cpyoff_blocks; /* offset of dec_cpyoff in blocks */
777 usize_t cpyoff_blkoff; /* offset of copy window in 825 usize_t cpyoff_blkoff; /* offset of copy window in
778 blocks, remainder */ 826 blocks, remainder */
@@ -829,7 +877,7 @@ struct _xd3_stream
829 xd3_free_func *free; /* free function */ 877 xd3_free_func *free; /* free function */
830 void* opaque; /* private data object passed to 878 void* opaque; /* private data object passed to
831 alloc, free, and getblk */ 879 alloc, free, and getblk */
832 int flags; /* various options */ 880 uint32_t flags; /* various options */
833 881
834 /* secondary compressor configuration */ 882 /* secondary compressor configuration */
835 xd3_sec_cfg sec_data; /* Secondary compressor config: data */ 883 xd3_sec_cfg sec_data; /* Secondary compressor config: data */
@@ -927,13 +975,11 @@ struct _xd3_stream
927 975
928 usize_t dec_secondid; /* Optional secondary compressor ID. */ 976 usize_t dec_secondid; /* Optional secondary compressor ID. */
929 977
930 /* TODO: why decode breaks if this is usize_t? */ 978 usize_t dec_codetblsz; /* Optional code table: length. */
931 uint32_t dec_codetblsz; /* Optional code table: length. */
932 uint8_t *dec_codetbl; /* Optional code table: storage. */ 979 uint8_t *dec_codetbl; /* Optional code table: storage. */
933 usize_t dec_codetblbytes; /* Optional code table: position. */ 980 usize_t dec_codetblbytes; /* Optional code table: position. */
934 981
935 /* TODO: why decode breaks if this is usize_t? */ 982 usize_t dec_appheadsz; /* Optional application header:
936 uint32_t dec_appheadsz; /* Optional application header:
937 size. */ 983 size. */
938 uint8_t *dec_appheader; /* Optional application header: 984 uint8_t *dec_appheader; /* Optional application header:
939 storage */ 985 storage */
@@ -944,15 +990,12 @@ struct _xd3_stream
944 uint8_t dec_cksum[4]; /* Optional checksum: storage. */ 990 uint8_t dec_cksum[4]; /* Optional checksum: storage. */
945 uint32_t dec_adler32; /* Optional checksum: value. */ 991 uint32_t dec_adler32; /* Optional checksum: value. */
946 992
947 /* TODO: why decode breaks if this is usize_t? */ 993 usize_t dec_cpylen; /* length of copy window
948 uint32_t dec_cpylen; /* length of copy window
949 (VCD_SOURCE or VCD_TARGET) */ 994 (VCD_SOURCE or VCD_TARGET) */
950 xoff_t dec_cpyoff; /* offset of copy window 995 xoff_t dec_cpyoff; /* offset of copy window
951 (VCD_SOURCE or VCD_TARGET) */ 996 (VCD_SOURCE or VCD_TARGET) */
952 /* TODO: why decode breaks if this is usize_t? */ 997 usize_t dec_enclen; /* length of delta encoding */
953 uint32_t dec_enclen; /* length of delta encoding */ 998 usize_t dec_tgtlen; /* length of target window */
954 /* TODO: why decode breaks if this is usize_t? */
955 uint32_t dec_tgtlen; /* length of target window */
956 999
957#if USE_UINT64 1000#if USE_UINT64
958 uint64_t dec_64part; /* part of a decoded uint64_t */ 1001 uint64_t dec_64part; /* part of a decoded uint64_t */
@@ -1258,7 +1301,7 @@ const char* xd3_strerror (int ret);
1258 specified flags. */ 1301 specified flags. */
1259static inline 1302static inline
1260void xd3_init_config (xd3_config *config, 1303void xd3_init_config (xd3_config *config,
1261 int flags) 1304 uint32_t flags)
1262{ 1305{
1263 memset (config, 0, sizeof (*config)); 1306 memset (config, 0, sizeof (*config));
1264 config->flags = flags; 1307 config->flags = flags;
@@ -1321,7 +1364,7 @@ usize_t xd3_encoder_srclen (xd3_stream *stream) {
1321 1364
1322/* Checks for legal flag changes. */ 1365/* Checks for legal flag changes. */
1323static inline 1366static inline
1324void xd3_set_flags (xd3_stream *stream, int flags) 1367void xd3_set_flags (xd3_stream *stream, uint32_t flags)
1325{ 1368{
1326 /* The bitwise difference should contain only XD3_FLUSH or 1369 /* The bitwise difference should contain only XD3_FLUSH or
1327 XD3_SKIP_WINDOW */ 1370 XD3_SKIP_WINDOW */
@@ -1346,8 +1389,8 @@ void xd3_blksize_div (const xoff_t offset,
1346 const xd3_source *source, 1389 const xd3_source *source,
1347 xoff_t *blkno, 1390 xoff_t *blkno,
1348 usize_t *blkoff) { 1391 usize_t *blkoff) {
1349 *blkno = (xoff_t) (offset >> source->shiftby); 1392 *blkno = offset >> source->shiftby;
1350 *blkoff = (usize_t) (offset & source->maskby); 1393 *blkoff = offset & source->maskby;
1351 XD3_ASSERT (*blkoff < source->blksize); 1394 XD3_ASSERT (*blkoff < source->blksize);
1352} 1395}
1353 1396