diff options
author | Joe Crayne <joe@jerkface.net> | 2018-10-16 19:00:37 -0400 |
---|---|---|
committer | Joe Crayne <joe@jerkface.net> | 2018-10-16 19:00:37 -0400 |
commit | 3566b39b184945d3ff13fd69d7db91cffc0b5853 (patch) | |
tree | 55ecd57edeed2ce70e2aa2188621848e8633bfed | |
parent | 7508fd2a823443b1f0173ca361620f21d62a7d37 (diff) |
haskell bindings
-rw-r--r-- | haskell/XDelta.hsc | 566 | ||||
-rw-r--r-- | haskell/Xdelta3.hsc | 962 | ||||
-rw-r--r-- | xdelta3.cabal | 36 |
3 files changed, 1564 insertions, 0 deletions
diff --git a/haskell/XDelta.hsc b/haskell/XDelta.hsc new file mode 100644 index 0000000..96e373a --- /dev/null +++ b/haskell/XDelta.hsc | |||
@@ -0,0 +1,566 @@ | |||
1 | {-# LANGUAGE BangPatterns #-} | ||
2 | {-# LANGUAGE GADTs #-} | ||
3 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} | ||
4 | {-# LANGUAGE LambdaCase #-} | ||
5 | {-# LANGUAGE PatternSynonyms #-} | ||
6 | module XDelta where | ||
7 | |||
8 | import Control.Exception | ||
9 | import Control.Monad | ||
10 | import Data.Bits | ||
11 | import qualified Data.ByteString as B | ||
12 | import qualified Data.ByteString.Internal as B | ||
13 | import Data.Function | ||
14 | import Data.Monoid | ||
15 | import qualified Data.Text as T | ||
16 | import Data.Text.Encoding | ||
17 | import Data.Word | ||
18 | import Foreign.C.Types | ||
19 | import Foreign.C.String | ||
20 | import Foreign.ForeignPtr | ||
21 | import Foreign.Marshal.Alloc | ||
22 | import Foreign.Marshal.Utils | ||
23 | import Foreign.Ptr | ||
24 | import Foreign.Storable | ||
25 | import System.IO | ||
26 | import System.IO.Error | ||
27 | import System.IO.Unsafe | ||
28 | |||
29 | #ifndef SIZEOF_SIZE_T | ||
30 | #define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ | ||
31 | #define SIZEOF_UNSIGNED_INT __SIZEOF_INT__ | ||
32 | #define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__ | ||
33 | #define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__ | ||
34 | #define static_assert(...) | ||
35 | #endif | ||
36 | #include <xdelta3.h> | ||
37 | |||
38 | type Usize_t = #type usize_t | ||
39 | -- | Printf code for type Usize_t | ||
40 | pattern W :: String | ||
41 | pattern W = #const_str W "" | ||
42 | |||
43 | type Xoff_t = #type xoff_t | ||
44 | -- | Printf code for type Xoff_t | ||
45 | pattern Q :: String | ||
46 | pattern Q = #const_str Q "" | ||
47 | |||
48 | |||
49 | -- | These are the five ordinary status codes returned by the | ||
50 | -- xd3_encode_input() and xd3_decode_input() state machines. | ||
51 | -- | ||
52 | -- An application must be prepared to handle these five return | ||
53 | -- values from either xd3_encode_input or xd3_decode_input except | ||
54 | -- in the case of no-source compression in which case XD3_GETSRCBLK | ||
55 | -- is never returned. More detailed comments for these are given in | ||
56 | -- xd3_encode_input and xd3_decode_input comments below. | ||
57 | newtype ErrorCode = ErrorCode CInt | ||
58 | deriving Show | ||
59 | |||
60 | pattern XD3_SUCCESS = ErrorCode 0 | ||
61 | |||
62 | -- | need input | ||
63 | pattern XD3_INPUT = ErrorCode (#const XD3_INPUT) | ||
64 | |||
65 | -- | have output | ||
66 | pattern XD3_OUTPUT = ErrorCode (#const XD3_OUTPUT) | ||
67 | |||
68 | -- | need a block of source input (with no xd3_getblk function) a chance to do non-blocking read. | ||
69 | pattern XD3_GETSRCBLK = ErrorCode (#const XD3_GETSRCBLK) | ||
70 | |||
71 | -- | (decode-only) after the initial VCDIFF & first window header | ||
72 | pattern XD3_GOTHEADER = ErrorCode (#const XD3_GOTHEADER) | ||
73 | |||
74 | -- | notification: returned before a window is processed giving a chance to XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that window. | ||
75 | pattern XD3_WINSTART = ErrorCode (#const XD3_WINSTART) | ||
76 | |||
77 | -- | notification: returned after encode/decode & output for a window | ||
78 | pattern XD3_WINFINISH = ErrorCode (#const XD3_WINFINISH) | ||
79 | |||
80 | -- | (encoder only) may be returned by getblk() if the block is too old | ||
81 | pattern XD3_TOOFARBACK = ErrorCode (#const XD3_TOOFARBACK) | ||
82 | |||
83 | -- | internal error | ||
84 | pattern XD3_INTERNAL = ErrorCode (#const XD3_INTERNAL) | ||
85 | |||
86 | -- | invalid config | ||
87 | pattern XD3_INVALID = ErrorCode (#const XD3_INVALID) | ||
88 | |||
89 | -- | invalid input/decoder error | ||
90 | pattern XD3_INVALID_INPUT = ErrorCode (#const XD3_INVALID_INPUT) | ||
91 | |||
92 | -- | when secondary compression finds no improvement. | ||
93 | pattern XD3_NOSECOND = ErrorCode (#const XD3_NOSECOND) | ||
94 | |||
95 | -- | currently VCD_TARGET VCD_CODETABLE | ||
96 | pattern XD3_UNIMPLEMENTED = ErrorCode (#const XD3_UNIMPLEMENTED) | ||
97 | |||
98 | instance Exception ErrorCode | ||
99 | |||
100 | newtype Stream = Stream (ForeignPtr Stream) | ||
101 | |||
102 | -- | Settings for the secondary compressor. | ||
103 | data CompressorConfig = CompressorConfig | ||
104 | { ngroups :: Usize_t -- ^ Number of DJW Huffman groups. | ||
105 | , sector_size :: Usize_t -- ^ Sector size. | ||
106 | , inefficient :: Int -- ^ If true, ignore efficiency check [avoid XD3_NOSECOND]. | ||
107 | } | ||
108 | |||
109 | -- | The values of this enumeration are set in xd3_config using the | ||
110 | -- 'smatch_cfg' variable. It can be set to default, slow, fast, etc., | ||
111 | -- and soft. | ||
112 | data SMatchSelect | ||
113 | = SMATCH_DEFAULT -- ^ Flags may contain XD3_COMPLEVEL bits, else default. | ||
114 | | SMATCH_SLOW | ||
115 | | SMATCH_FAST | ||
116 | | SMATCH_FASTER | ||
117 | | SMATCH_FASTEST | ||
118 | deriving Enum | ||
119 | |||
120 | |||
121 | matcher :: SMatchSelect -> StringMatcher | ||
122 | matcher select = unsafePerformIO $ do | ||
123 | let config = (init_config (mempty :: Flags)) { winsize = 40, smatch_cfg = Right select } | ||
124 | Stream fptr <- throwXD $ config_stream config | ||
125 | m <- withForeignPtr fptr $ \stream -> do | ||
126 | let smatcher = (#ptr xd3_stream, smatcher) stream | ||
127 | nmptr = (#ptr xd3_smatcher, name) smatcher | ||
128 | nm <- peekCString nmptr | ||
129 | StringMatcher nm | ||
130 | <$> (#peek xd3_smatcher, string_match) smatcher | ||
131 | <*> (#peek xd3_smatcher, large_look) smatcher | ||
132 | <*> (#peek xd3_smatcher, large_step) smatcher | ||
133 | <*> (#peek xd3_smatcher, small_look) smatcher | ||
134 | <*> (#peek xd3_smatcher, small_chain) smatcher | ||
135 | <*> (#peek xd3_smatcher, small_lchain) smatcher | ||
136 | <*> (#peek xd3_smatcher, max_lazy) smatcher | ||
137 | <*> (#peek xd3_smatcher, long_enough) smatcher | ||
138 | finalizeForeignPtr fptr | ||
139 | return m | ||
140 | |||
141 | -- | Default configured value of stream->winsize. If the program | ||
142 | -- supplies xd3_encode_input() with data smaller than winsize the | ||
143 | -- stream will automatically buffer the input, otherwise the input | ||
144 | -- buffer is used directly. | ||
145 | pattern XD3_DEFAULT_WINSIZE = #const XD3_DEFAULT_WINSIZE | ||
146 | -- 8 MiB | ||
147 | |||
148 | data Config = Config | ||
149 | { winsize :: Usize_t -- ^ The encoder window size. | ||
150 | -- The encoder allocates a buffer of this size if the | ||
151 | -- program supplies input in smaller units (unless the | ||
152 | -- XD3_FLUSH flag is set). | ||
153 | , sprevsz :: Usize_t -- ^ How far back small string matching goes | ||
154 | , iopt_size :: Usize_t -- ^ entries in the instruction-optimizing buffer | ||
155 | , flags :: Flags -- ^ stream->flags are initialized from xd3_config & never modified by the library. Use xd3_set_flags to modify flags settings mid-stream. | ||
156 | , sec_data :: CompressorConfig -- ^ Secondary compressor config: data | ||
157 | , sec_inst :: CompressorConfig -- ^ Secondary compressor config: inst | ||
158 | , sec_addr :: CompressorConfig -- ^ Secondary compressor config: addr | ||
159 | , smatch_cfg :: Either StringMatcher SMatchSelect -- ^ See enum: use fields below for soft config | ||
160 | } | ||
161 | |||
162 | -- | This is the record of a pre-compiled configuration, a subset of | ||
163 | -- xd3_config. (struct _xd3_smatcher) | ||
164 | data StringMatcher = StringMatcher | ||
165 | { smName :: String | ||
166 | , smStringMatch :: FunPtr (Ptr Stream -> ErrorCode) | ||
167 | , smLargeLook :: Usize_t | ||
168 | , smLargeStep :: Usize_t | ||
169 | , smSmallLook :: Usize_t | ||
170 | , smSmallChain :: Usize_t | ||
171 | , smSmallLchain :: Usize_t | ||
172 | , smMaxLazy :: Usize_t | ||
173 | , smLongEnough :: Usize_t | ||
174 | } | ||
175 | |||
176 | instance Storable Config where | ||
177 | sizeOf _ = #const sizeof(xd3_config) | ||
178 | alignment _ = 1 | ||
179 | poke p cfg = do | ||
180 | (#poke xd3_config, winsize) p $ winsize cfg | ||
181 | (#poke xd3_config, flags) p $ flags cfg | ||
182 | peek p = do | ||
183 | winsize <- (#peek xd3_config, winsize) p | ||
184 | flags <- (#peek xd3_config, flags) p | ||
185 | return Config | ||
186 | { winsize = winsize | ||
187 | , flags = flags | ||
188 | } | ||
189 | |||
190 | newtype Flags = Flags Word32 | ||
191 | deriving (Storable,Eq,Bits,FiniteBits) | ||
192 | |||
193 | -- used by VCDIFF tools, see xdelta3-main.h.--/ | ||
194 | pattern XD3_JUST_HDR = Flags (#const XD3_JUST_HDR) | ||
195 | -- used by VCDIFF tools see xdelta3-main.h.--/ | ||
196 | pattern XD3_SKIP_WINDOW = Flags (#const XD3_SKIP_WINDOW) | ||
197 | -- | used by VCDIFF tools, see xdelta3-main.h. */ | ||
198 | pattern XD3_SKIP_EMIT = Flags (#const XD3_SKIP_EMIT) | ||
199 | -- | flush the stream buffer to prepare for xd3_stream_close(). */ | ||
200 | pattern XD3_FLUSH = Flags (#const XD3_FLUSH) | ||
201 | -- | use DJW static huffman */ | ||
202 | pattern XD3_SEC_DJW = Flags (#const XD3_SEC_DJW) | ||
203 | -- | use FGK adaptive huffman */ | ||
204 | pattern XD3_SEC_FGK = Flags (#const XD3_SEC_FGK) | ||
205 | -- | use LZMA secondary */ | ||
206 | pattern XD3_SEC_LZMA = Flags (#const XD3_SEC_LZMA) | ||
207 | pattern XD3_SEC_TYPE = Flags (#const XD3_SEC_TYPE) | ||
208 | -- | disable secondary compression of the data section. */ | ||
209 | pattern XD3_SEC_NODATA = Flags (#const XD3_SEC_NODATA) | ||
210 | -- | disable secondary compression of the inst section. */ | ||
211 | pattern XD3_SEC_NOINST = Flags (#const XD3_SEC_NOINST) | ||
212 | -- | disable secondary compression of the addr section. */ | ||
213 | pattern XD3_SEC_NOADDR = Flags (#const XD3_SEC_NOADDR) | ||
214 | pattern XD3_SEC_NOALL = Flags (#const XD3_SEC_NOALL) | ||
215 | -- | enable checksum computation in the encoder. */ | ||
216 | pattern XD3_ADLER32 = Flags (#const XD3_ADLER32) | ||
217 | -- | disable checksum verification in the decoder. */ | ||
218 | pattern XD3_ADLER32_NOVER = Flags (#const XD3_ADLER32_NOVER) | ||
219 | -- | disable ordinary data * compression feature, only search * the source, not the target. */ | ||
220 | pattern XD3_NOCOMPRESS = Flags (#const XD3_NOCOMPRESS) | ||
221 | -- | disable the "1.5-pass * algorithm", instead use greedy * matching. Greedy is off by * default. */ | ||
222 | pattern XD3_BEGREEDY = Flags (#const XD3_BEGREEDY) | ||
223 | -- | used by "recode". */ | ||
224 | pattern XD3_ADLER32_RECODE = Flags (#const XD3_ADLER32_RECODE) | ||
225 | -- 4 bits to set the compression level the same as the command-line | ||
226 | -- setting -1 through -9 Flags (-0 corresponds to the XD3_NOCOMPRESS flag | ||
227 | -- and is independent of compression level). This is for | ||
228 | -- convenience especially with xd3_encode_memoryFlags (). */ | ||
229 | pattern XD3_COMPLEVEL_SHIFT = #const XD3_COMPLEVEL_SHIFT | ||
230 | pattern XD3_COMPLEVEL_MASK = Flags (#const XD3_COMPLEVEL_MASK) | ||
231 | pattern XD3_COMPLEVEL_1 = Flags (#const XD3_COMPLEVEL_1) | ||
232 | pattern XD3_COMPLEVEL_2 = Flags (#const XD3_COMPLEVEL_2) | ||
233 | pattern XD3_COMPLEVEL_3 = Flags (#const XD3_COMPLEVEL_3) | ||
234 | pattern XD3_COMPLEVEL_6 = Flags (#const XD3_COMPLEVEL_6) | ||
235 | pattern XD3_COMPLEVEL_9 = Flags (#const XD3_COMPLEVEL_9) | ||
236 | |||
237 | instance Monoid Flags where | ||
238 | mempty = Flags 0 | ||
239 | Flags a `mappend` Flags b = Flags (a .|. b) | ||
240 | |||
241 | type CGetBlk = Ptr Stream -> Ptr Xd3_source -> Xoff_t -> IO CInt | ||
242 | |||
243 | foreign import ccall "wrapper" | ||
244 | wrapGetBlk :: CGetBlk -> IO (FunPtr CGetBlk) | ||
245 | |||
246 | foreign import ccall "wrapper" | ||
247 | wrapFinalizer :: (Ptr Stream -> IO ()) -> IO (FunPtr (Ptr Stream -> IO ())) | ||
248 | |||
249 | -- | For convenience, zero & initialize the xd3_config structure with specified | ||
250 | -- flags. | ||
251 | init_config :: Flags -> Config | ||
252 | init_config flags = Config | ||
253 | { winsize = 0 | ||
254 | , flags = flags | ||
255 | } | ||
256 | |||
257 | foreign import ccall "xdelta3.h xd3_config_stream" xd3_config_stream :: Ptr Stream -> Ptr xd3_config -> IO ErrorCode | ||
258 | |||
259 | -- | xd3_free_stream frees all memory allocated for the stream. The | ||
260 | -- application is responsible for freeing any of the resources it | ||
261 | -- supplied. | ||
262 | foreign import ccall "xdelta3.h xd3_free_stream" xd3_free_stream :: Ptr Stream -> IO () | ||
263 | |||
264 | |||
265 | -- The xd3_config structure is used to initialize a stream - all data | ||
266 | -- is copied into stream so config may be a temporary variable. See | ||
267 | -- the [documentation] or comments on the xd3_config structure. | ||
268 | config_stream :: Config -> IO (Either ErrorCode Stream) | ||
269 | config_stream cfg = do | ||
270 | pstream <- callocBytes (#const sizeof(xd3_stream)) | ||
271 | fptr <- newForeignPtr finalizerFree pstream | ||
272 | wrapFinalizer xd3_free_stream >>= (`addForeignPtrFinalizer` fptr) | ||
273 | wrapFinalizer unset_source >>= (`addForeignPtrFinalizer` fptr) | ||
274 | wrapFinalizer unset_header >>= (`addForeignPtrFinalizer` fptr) | ||
275 | with cfg $ \pcfg -> do | ||
276 | code <- xd3_config_stream pstream pcfg | ||
277 | case code of | ||
278 | ErrorCode 0 -> return . Right $ Stream fptr | ||
279 | _ -> do free pstream | ||
280 | return . Left $ code | ||
281 | |||
282 | throwXD :: IO (Either ErrorCode a) -> IO a | ||
283 | throwXD action = action >>= either throwIO return | ||
284 | |||
285 | example_setup :: IO Stream | ||
286 | example_setup = do | ||
287 | let config = (init_config (mempty :: Flags)) { winsize = 32768 } | ||
288 | stream <- throwXD $ config_stream config | ||
289 | return stream | ||
290 | |||
291 | |||
292 | newtype Xd3_source = Xd3_source (Ptr Xd3_source) | ||
293 | |||
294 | data CurrentBlock = CurrentBlock | ||
295 | { blkno :: !Xoff_t -- ^ current block number | ||
296 | , blkSize :: !Usize_t -- ^ number of bytes on current block: must be >= 0 and <= 'srcBlockSize' | ||
297 | , blkPtr :: !(Ptr Word8) -- ^ current block array | ||
298 | } | ||
299 | |||
300 | -- XD3_TOOFARBACK block is too old | ||
301 | -- XD3_INVALID_INPUT | ||
302 | type GetBlock = CurrentBlock -> Xoff_t -> IO (Either ErrorCode CurrentBlock) | ||
303 | |||
304 | data Source = Source | ||
305 | { srcName :: String -- ^ name for debug/print purposes | ||
306 | , srcBlockSize :: Usize_t -- ^ block size | ||
307 | , srcMaxWinSize :: Xoff_t -- ^ maximum visible buffer | ||
308 | , srcGetBlock :: Maybe GetBlock | ||
309 | } | ||
310 | |||
311 | foreign import ccall "xdelta3.h xd3_set_source" xd3_set_source :: Ptr Stream -> Ptr Xd3_source -> IO ErrorCode | ||
312 | |||
313 | requestedBlockNumber :: Stream -> IO Xoff_t | ||
314 | requestedBlockNumber (Stream fptr) = withForeignPtr fptr $ \stream -> do | ||
315 | psrc <- (#peek xd3_stream, src) stream | ||
316 | (#peek xd3_source, getblkno) psrc | ||
317 | |||
318 | peekCurrentBlock :: Stream -> IO CurrentBlock | ||
319 | peekCurrentBlock (Stream fptr) = withForeignPtr fptr $ \stream -> do | ||
320 | psrc <- (#peek xd3_stream, src) stream | ||
321 | CurrentBlock <$> (#peek xd3_source, curblkno) psrc | ||
322 | <*> (#peek xd3_source, onblk) psrc | ||
323 | <*> (#peek xd3_source, curblk) psrc | ||
324 | |||
325 | pokeCurrentBlock :: Stream -> CurrentBlock -> IO () | ||
326 | pokeCurrentBlock (Stream fptr) (CurrentBlock no sz ptr) = withForeignPtr fptr $ \stream -> do | ||
327 | psrc <- (#peek xd3_stream, src) stream | ||
328 | (#poke xd3_source, curblkno) psrc no | ||
329 | (#poke xd3_source, onblk ) psrc sz | ||
330 | (#poke xd3_source, curblk ) psrc ptr | ||
331 | |||
332 | nextOut :: Stream -> ((Ptr Word8, Int) -> IO a) -> IO a | ||
333 | nextOut (Stream fptr) action = withForeignPtr fptr $ \stream -> do | ||
334 | buf <- (,) <$> (#peek xd3_stream, next_out) stream | ||
335 | <*> (#peek xd3_stream, avail_out) stream | ||
336 | a <- action buf | ||
337 | xd3_consume_output stream | ||
338 | return a | ||
339 | |||
340 | unset_source :: Ptr Stream -> IO () | ||
341 | unset_source stream = do | ||
342 | previous_src <- (#peek xd3_stream, src) stream | ||
343 | when (previous_src /= nullPtr) $ free previous_src | ||
344 | (#poke xd3_stream, src) stream nullPtr | ||
345 | |||
346 | unset_header :: Ptr Stream -> IO () | ||
347 | unset_header stream = do | ||
348 | hdr <- (#peek xd3_stream, enc_appheader) stream | ||
349 | when (hdr /= nullPtr) $ free hdr | ||
350 | (#poke xd3_stream, enc_appheader) stream nullPtr | ||
351 | |||
352 | |||
353 | -- | This function informs the encoder or decoder that source matching | ||
354 | -- (i.e., delta-compression) is possible. For encoding, this should | ||
355 | -- be called before the first xd3_encode_input. A NULL source is | ||
356 | -- ignored. For decoding, this should be called before the first | ||
357 | -- window is decoded, but the appheader may be read first | ||
358 | -- (XD3_GOTHEADER). After decoding the header, call xd3_set_source() | ||
359 | -- if you have a source file. Note: if (stream->dec_win_ind & VCD_SOURCE) | ||
360 | -- is true, it means the first window expects there to be a source file. | ||
361 | set_source :: Stream -> Source -> IO ErrorCode | ||
362 | set_source (Stream fptr) src = withForeignPtr fptr $ \stream -> do | ||
363 | unset_source stream | ||
364 | let bsname = encodeUtf8 $ T.pack $ srcName src | ||
365 | psrc <- callocBytes $ 1 + B.length bsname + (#const sizeof(xd3_source)) | ||
366 | let pname = castPtr psrc `plusPtr` (#const sizeof(xd3_source)) | ||
367 | copyname ptr (w:ws) = poke ptr w >> copyname (plusPtr ptr 1) ws | ||
368 | copyname ptr [] = poke ptr (0 :: Word8) | ||
369 | copyname pname (B.unpack bsname) | ||
370 | (#poke xd3_source, blksize ) psrc $ srcBlockSize src | ||
371 | (#poke xd3_source, name ) psrc pname | ||
372 | (#poke xd3_source, max_winsize) psrc $ srcMaxWinSize src | ||
373 | (#poke xd3_source, curblkno ) psrc (maxBound :: Xoff_t) | ||
374 | srcGetBlock src `forM_` \getBlock -> do | ||
375 | cgetblk <- wrapGetBlk $ \stream psrc xoff -> do | ||
376 | curblk <- CurrentBlock | ||
377 | <$> (#peek xd3_source, curblkno) psrc | ||
378 | <*> (#peek xd3_source, onblk) psrc | ||
379 | <*> (#peek xd3_source, curblk) psrc | ||
380 | ret <- getBlock curblk xoff | ||
381 | case ret of | ||
382 | Left (ErrorCode ecode) -> return ecode | ||
383 | Right (CurrentBlock no sz ptr) -> do | ||
384 | (#poke xd3_source, curblkno) psrc no | ||
385 | (#poke xd3_source, onblk ) psrc sz | ||
386 | (#poke xd3_source, curblk ) psrc ptr | ||
387 | return 0 | ||
388 | (#poke xd3_stream, getblk) stream cgetblk -- Warning: xdelta3.h documents this as an internal field. | ||
389 | -- It's possible to comply with the documentation using the | ||
390 | -- 'ioh' field to dispatch, but that would be awkward and | ||
391 | -- inefficient. | ||
392 | xd3_set_source stream psrc | ||
393 | |||
394 | |||
395 | -- | Checks for legal flag changes. | ||
396 | foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Stream -> Flags -> IO () | ||
397 | |||
398 | foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Stream -> Ptr a -> Usize_t -> IO () | ||
399 | foreign import ccall "xdelta3.h xd3_decode_input" xd3_decode_input :: Ptr Stream -> IO ErrorCode | ||
400 | foreign import ccall "xdelta3.h xd3_encode_input" xd3_encode_input :: Ptr Stream -> IO ErrorCode | ||
401 | |||
402 | set_flags (Stream fp) f = withForeignPtr fp $ (`xd3_set_flags` f) | ||
403 | avail_input (Stream fp) p sz = withForeignPtr fp (\stream -> xd3_avail_input stream p sz) | ||
404 | decode_input (Stream fp) = withForeignPtr fp xd3_decode_input | ||
405 | encode_input (Stream fp) = withForeignPtr fp xd3_encode_input | ||
406 | |||
407 | -- xd3_get_appheader may be called in the decoder after XD3_GOTHEADER. | ||
408 | -- For convenience, the decoder always adds a single byte padding to | ||
409 | -- the end of the application header, which is set to zero in case the | ||
410 | -- application header is a string. | ||
411 | foreign import ccall "xdelta3.h xd3_get_appheader" xd3_get_appheader :: Ptr Stream -> Ptr (Ptr Word8) -> Ptr Usize_t -> IO ErrorCode | ||
412 | |||
413 | foreign import ccall "xdelta3.h xd3_set_appheader" xd3_set_appheader :: Ptr Stream -> Ptr Word8 -> Usize_t -> IO () | ||
414 | |||
415 | -- output//source/ | ||
416 | appHeader :: Stream -> IO (Maybe String) | ||
417 | appHeader (Stream fptr) = withForeignPtr fptr $ \stream -> | ||
418 | alloca $ \pp -> | ||
419 | alloca $ \psz -> do | ||
420 | xd3_get_appheader stream pp psz >>= \case | ||
421 | XD3_SUCCESS -> peek pp >>= \case | ||
422 | hdr | hdr == nullPtr -> return Nothing | ||
423 | | otherwise -> do sz <- peek psz | ||
424 | Just <$> peekCStringLen (castPtr pp,fromIntegral sz) | ||
425 | _ -> return Nothing | ||
426 | |||
427 | |||
428 | -- | This should be called before the first call to xd3_encode_input() to | ||
429 | -- include application-specific data in the VCDIFF header. | ||
430 | -- | ||
431 | -- Note: This is used for encoding and is not the same field retrieved by | ||
432 | -- 'appHeader'. | ||
433 | setAppHeader :: Stream -> String -> IO () | ||
434 | setAppHeader (Stream fptr) hdr = withForeignPtr fptr $ \stream -> do | ||
435 | withCStringLen hdr $ \(dta,sz) -> do | ||
436 | phdr <- mallocBytes sz | ||
437 | copyBytes phdr (castPtr dta) sz | ||
438 | xd3_set_appheader stream phdr (fromIntegral sz) | ||
439 | |||
440 | |||
441 | sourceFromHandle :: Usize_t -> Ptr Word8 -> Handle -> Source | ||
442 | sourceFromHandle blksize ptr h = Source | ||
443 | { srcName = "sourceFromHandle" | ||
444 | , srcBlockSize = blksize | ||
445 | , srcMaxWinSize = blksize | ||
446 | , srcGetBlock = Just $ \_ num -> do | ||
447 | hSeek h AbsoluteSeek (fromIntegral blksize * fromIntegral num) | ||
448 | cnt <- hGetBuf h ptr (fromIntegral blksize) | ||
449 | return $ Right (CurrentBlock num (fromIntegral cnt) ptr) | ||
450 | `catchIOError` \_ -> return $ Left XD3_TOOFARBACK | ||
451 | } | ||
452 | |||
453 | example_set_source stream = do | ||
454 | h <- openFile "source-file.bin" ReadMode | ||
455 | buf <- mallocBytes 32768 | ||
456 | let source = (sourceFromHandle 32768 buf h) { srcName = "source-file.bin" } | ||
457 | ret <- set_source stream source | ||
458 | return ret | ||
459 | |||
460 | withFileSource :: Config -> FilePath -> Int -> (Stream -> IO a) -> IO a | ||
461 | withFileSource cfg fname blksize action = do | ||
462 | stream <- throwXD $ config_stream cfg | ||
463 | withFile fname ReadMode $ \h -> do | ||
464 | allocaBytes blksize $ \buf -> do | ||
465 | code <- set_source stream (sourceFromHandle (fromIntegral blksize) buf h) { srcName = fname } | ||
466 | case code of | ||
467 | XD3_SUCCESS -> action stream | ||
468 | _ -> throwIO code | ||
469 | |||
470 | |||
471 | example_input_loop stream inp = do | ||
472 | allocaBytes 4096 $ \indata -> fix $ \loop -> do | ||
473 | insize <- hGetBuf inp indata 4096 | ||
474 | let reached_EOF = insize < 4096 | ||
475 | when reached_EOF $ set_flags stream XD3_FLUSH | ||
476 | avail_input stream indata (fromIntegral insize) | ||
477 | fix $ \process -> do | ||
478 | ret <- decode_input stream | ||
479 | case ret of | ||
480 | XD3_INPUT -> when (not reached_EOF) loop | ||
481 | XD3_OUTPUT -> do | ||
482 | -- todo write data | ||
483 | process | ||
484 | XD3_GETSRCBLK -> do | ||
485 | -- todo set source block | ||
486 | process | ||
487 | XD3_GOTHEADER -> process -- No | ||
488 | XD3_WINSTART -> process -- action | ||
489 | XD3_WINFINISH -> process -- neccessary | ||
490 | _ -> throwIO (userError "Unexpected return code from decode_input.") | ||
491 | |||
492 | {- | ||
493 | data BufferEater m where | ||
494 | Lazy :: (ByteString -> m -> m) -> BufferEater m | ||
495 | Strict :: (ByteString -> IO ()) -> BufferEater () | ||
496 | -} | ||
497 | |||
498 | data XDeltaMethods m = XDeltaMethods | ||
499 | { xConfig :: Config | ||
500 | , xGetSource :: Xoff_t -> B.ByteString | ||
501 | , xOutput :: Ptr Word8 -> Int -> IO m | ||
502 | , xOnError :: ErrorCode -> String -> IO m | ||
503 | , xBlockSize :: Usize_t | ||
504 | } | ||
505 | |||
506 | -- | This acknowledges receipt of output data, must be called after any | ||
507 | -- XD3_OUTPUT return. | ||
508 | foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Stream -> IO () | ||
509 | |||
510 | -- -- built in to 'nextOut' | ||
511 | -- acknowledgeOutput :: Stream -> IO () | ||
512 | -- acknowledgeOutput (Stream fptr) = withForeignPtr fptr $ xd3_consume_output | ||
513 | |||
514 | foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Stream -> IO CString | ||
515 | |||
516 | -- | Gives some extra information about the latest library error, if any | ||
517 | -- is known. | ||
518 | errorString (Stream fptr) = withForeignPtr fptr $ \stream -> do | ||
519 | cstring <- xd3_errstring stream | ||
520 | peekCString cstring | ||
521 | |||
522 | |||
523 | xdelta :: Monoid m => XDeltaMethods m -> (Stream -> IO ErrorCode) -> [B.ByteString] -> IO m | ||
524 | xdelta x xxcode_input ds = do | ||
525 | stream <- throwXD $ config_stream (xConfig x) | ||
526 | set_source stream Source | ||
527 | { srcName = "XDeltaMethods" | ||
528 | , srcBlockSize = xBlockSize x | ||
529 | , srcMaxWinSize = xBlockSize x | ||
530 | , srcGetBlock = Nothing | ||
531 | } | ||
532 | let go withBlk (d:ds) = do | ||
533 | let (fp,off,len) = B.toForeignPtr d | ||
534 | withForeignPtr fp $ \indata0 -> do | ||
535 | let indata = indata0 `plusPtr` off | ||
536 | eof = null ds | ||
537 | when eof $ set_flags stream XD3_FLUSH | ||
538 | avail_input stream indata (fromIntegral len) | ||
539 | go2 withBlk eof ds | ||
540 | go2 withBlk eof ds = do | ||
541 | ret <- withBlk $ xxcode_input stream | ||
542 | case ret of | ||
543 | XD3_INPUT -> if (not eof) then go withBlk ds else return mempty | ||
544 | XD3_OUTPUT -> do | ||
545 | m' <- nextOut stream (uncurry $ xOutput x) | ||
546 | ms <- unsafeInterleaveIO $ go2 withBlk eof ds | ||
547 | return $ m' <> ms | ||
548 | XD3_GETSRCBLK -> do | ||
549 | n <- requestedBlockNumber stream | ||
550 | let blk = xGetSource x n | ||
551 | withBlk' act = let (fp,off,len) = B.toForeignPtr blk | ||
552 | in withForeignPtr fp $ \p -> do | ||
553 | pokeCurrentBlock stream $ CurrentBlock n (fromIntegral len) (plusPtr p off) | ||
554 | act | ||
555 | go2 withBlk' eof ds | ||
556 | XD3_GOTHEADER -> go2 withBlk eof ds -- No | ||
557 | XD3_WINSTART -> go2 withBlk eof ds -- action | ||
558 | XD3_WINFINISH -> go2 withBlk eof ds -- neccessary | ||
559 | -- -- These are set for each XD3_WINFINISH return. | ||
560 | -- xd3_encoder_used_source :: Ptr Stream -> IO Bool | ||
561 | -- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t | ||
562 | -- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t | ||
563 | e -> do | ||
564 | s <- errorString stream | ||
565 | xOnError x e s | ||
566 | go id ds | ||
diff --git a/haskell/Xdelta3.hsc b/haskell/Xdelta3.hsc new file mode 100644 index 0000000..53b8a91 --- /dev/null +++ b/haskell/Xdelta3.hsc | |||
@@ -0,0 +1,962 @@ | |||
1 | {-# LANGUAGE BangPatterns #-} | ||
2 | {-# LANGUAGE PatternSynonyms #-} | ||
3 | module Xdelta3 where | ||
4 | |||
5 | import Data.Word | ||
6 | import Foreign.C.Types | ||
7 | import Foreign.C.String | ||
8 | import Foreign.Marshal.Alloc | ||
9 | import Foreign.Ptr | ||
10 | |||
11 | -- #define HAVE_CONFIG_H 1 | ||
12 | |||
13 | #define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ | ||
14 | #define SIZEOF_UNSIGNED_INT __SIZEOF_INT__ | ||
15 | #define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__ | ||
16 | #define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__ | ||
17 | #define static_assert(...) | ||
18 | |||
19 | #include <xdelta3.h> | ||
20 | |||
21 | |||
22 | -- To learn more about Xdelta, start by reading xdelta3.c. If you are | ||
23 | -- ready to use the API, continue reading here. There are two | ||
24 | -- interfaces -- xd3_encode_input and xd3_decode_input -- plus a dozen | ||
25 | -- or so related calls. This interface is styled after Zlib. | ||
26 | |||
27 | |||
28 | -- | Default configured value of stream->winsize. If the program | ||
29 | -- supplies xd3_encode_input() with data smaller than winsize the | ||
30 | -- stream will automatically buffer the input, otherwise the input | ||
31 | -- buffer is used directly. | ||
32 | pattern XD3_DEFAULT_WINSIZE = #const XD3_DEFAULT_WINSIZE | ||
33 | |||
34 | -- | Default total size of the source window used in xdelta3-main.h | ||
35 | pattern XD3_DEFAULT_SRCWINSZ = #const XD3_DEFAULT_SRCWINSZ | ||
36 | |||
37 | -- When Xdelta requests a memory allocation for certain buffers, it | ||
38 | -- rounds up to units of at least this size. The code assumes (and | ||
39 | -- asserts) that this is a power-of-two. | ||
40 | pattern XD3_ALLOCSIZE = #const XD3_ALLOCSIZE | ||
41 | |||
42 | |||
43 | -- The XD3_HARDMAXWINSIZE parameter is a safety mechanism to protect | ||
44 | -- decoders against malicious files. The decoder will never decode a | ||
45 | -- window larger than this. If the file specifies VCD_TARGET the | ||
46 | -- decoder may require two buffers of this size. | ||
47 | -- | ||
48 | -- 8-16MB is reasonable, probably don't need to go larger. | ||
49 | pattern XD3_HARDMAXWINSIZE = #const XD3_HARDMAXWINSIZE | ||
50 | |||
51 | |||
52 | -- The IOPT_SIZE value sets the size of a buffer used to batch | ||
53 | -- overlapping copy instructions before they are optimized by picking | ||
54 | -- the best non-overlapping ranges. The larger this buffer, the | ||
55 | -- longer a forced xd3_srcwin_setup() decision is held off. Setting | ||
56 | -- this value to 0 causes an unlimited buffer to be used. */ | ||
57 | pattern XD3_DEFAULT_IOPT_SIZE = #const XD3_DEFAULT_IOPT_SIZE | ||
58 | |||
59 | -- The maximum distance backward to search for small matches */ | ||
60 | pattern XD3_DEFAULT_SPREVSZ = #const XD3_DEFAULT_SPREVSZ | ||
61 | |||
62 | -- The default compression level */ | ||
63 | pattern XD3_DEFAULT_LEVEL = #const XD3_DEFAULT_LEVEL | ||
64 | |||
65 | pattern XD3_DEFAULT_SECONDARY_LEVEL = #const XD3_DEFAULT_SECONDARY_LEVEL | ||
66 | |||
67 | pattern XD3_USE_LARGEFILE64 = #const XD3_USE_LARGEFILE64 | ||
68 | |||
69 | -- The source window size is limited to 2GB unless | ||
70 | -- XD3_USE_LARGESIZET is defined to 1. */ | ||
71 | pattern XD3_USE_LARGESIZET = #const XD3_USE_LARGESIZET | ||
72 | |||
73 | |||
74 | -- Sizes and addresses within VCDIFF windows are represented as usize_t | ||
75 | -- | ||
76 | -- For source-file offsets and total file sizes, total input and | ||
77 | -- output counts, the xoff_t type is used. The decoder and encoder | ||
78 | -- generally check for overflow of the xoff_t size (this is tested at | ||
79 | -- the 32bit boundary [xdelta3-test.h]). | ||
80 | |||
81 | -- Settings based on the size of xoff_t (32 vs 64 file offsets) */ | ||
82 | #if XD3_USE_LARGEFILE64 | ||
83 | -- xoff_t is a 64-bit type */ | ||
84 | pattern C__USE_FILE_OFFSET64 = #const __USE_FILE_OFFSET64 | ||
85 | |||
86 | pattern C_FILE_OFFSET_BITS = #const _FILE_OFFSET_BITS | ||
87 | #endif | ||
88 | |||
89 | #if SIZEOF_XOFF_T == 8 | ||
90 | type Xoff_t = Word64 | ||
91 | pattern Q = #const_str Q | ||
92 | #elif SIZEOF_XOFF_T == 4 | ||
93 | type Xoff_t = Word32 | ||
94 | pattern Q = "" | ||
95 | #endif | ||
96 | |||
97 | pattern SIZEOF_XOFF_T = #const SIZEOF_XOFF_T | ||
98 | |||
99 | #if SIZEOF_USIZE_T == 8 | ||
100 | type Usize_t = Word64 | ||
101 | pattern W = #const_str W | ||
102 | #elif SIZEOF_USIZE_T == 4 | ||
103 | type Usize_t = Word32 | ||
104 | pattern W = "" | ||
105 | #endif | ||
106 | |||
107 | pattern SIZEOF_USIZE_T = #const SIZEOF_USIZE_T | ||
108 | |||
109 | pattern SIZEOF_SIZE_T = #const SIZEOF_SIZE_T | ||
110 | pattern Z = #const_str Z | ||
111 | |||
112 | pattern USE_UINT32 = #const USE_UINT32 | ||
113 | pattern USE_UINT64 = #const USE_UINT64 | ||
114 | |||
115 | pattern UNALIGNED_OK = #const UNALIGNED_OK | ||
116 | |||
117 | pattern XD3_ENCODER = #const XD3_ENCODER | ||
118 | |||
119 | -- The code returned when main() fails, also defined in system | ||
120 | -- includes. | ||
121 | pattern EXIT_FAILURE = #const EXIT_FAILURE | ||
122 | |||
123 | -- REGRESSION TEST enables the "xdelta3 test" command, which runs a | ||
124 | -- series of self-tests. | ||
125 | pattern REGRESSION_TEST = #const REGRESSION_TEST | ||
126 | |||
127 | pattern PYTHON_MODULE = #const PYTHON_MODULE | ||
128 | |||
129 | pattern SWIG_MODULE = #const SWIG_MODULE | ||
130 | |||
131 | pattern NOT_MAIN = #const NOT_MAIN | ||
132 | |||
133 | -- There are three string matching functions supplied: one fast, one | ||
134 | -- slow (default), and one soft-configurable. To disable any of | ||
135 | -- these, use the following definitions. | ||
136 | pattern XD3_BUILD_SLOW = #const XD3_BUILD_SLOW | ||
137 | pattern XD3_BUILD_FAST = #const XD3_BUILD_FAST | ||
138 | pattern XD3_BUILD_FASTER = #const XD3_BUILD_FASTER | ||
139 | pattern XD3_BUILD_FASTEST = #const XD3_BUILD_FASTEST | ||
140 | pattern XD3_BUILD_SOFT = #const XD3_BUILD_SOFT | ||
141 | pattern XD3_BUILD_DEFAULT = #const XD3_BUILD_DEFAULT | ||
142 | |||
143 | |||
144 | -- newtype Xd3_stream = Xd3_stream ( Ptr Xd3_stream ) | ||
145 | -- newtype Xd3_source = Xd3_source ( Ptr Xd3_source ) | ||
146 | newtype Xd3_hash_cfg = Xd3_hash_cfg ( Ptr Xd3_hash_cfg ) | ||
147 | newtype Xd3_smatcher = Xd3_smatcher ( Ptr Xd3_smatcher ) | ||
148 | newtype Xd3_rinst = Xd3_rinst ( Ptr Xd3_rinst ) | ||
149 | newtype Xd3_dinst = Xd3_dinst ( Ptr Xd3_dinst ) | ||
150 | newtype Xd3_hinst = Xd3_hinst ( Ptr Xd3_hinst ) | ||
151 | newtype Xd3_winst = Xd3_winst ( Ptr Xd3_winst ) | ||
152 | newtype Xd3_rpage = Xd3_rpage ( Ptr Xd3_rpage ) | ||
153 | newtype Xd3_addr_cache = Xd3_addr_cache ( Ptr Xd3_addr_cache ) | ||
154 | newtype Xd3_output = Xd3_output ( Ptr Xd3_output ) | ||
155 | newtype Xd3_desect = Xd3_desect ( Ptr Xd3_desect ) | ||
156 | newtype Xd3_iopt_buflist = Xd3_iopt_buflist ( Ptr Xd3_iopt_buflist ) | ||
157 | newtype Xd3_rlist = Xd3_rlist ( Ptr Xd3_rlist ) | ||
158 | newtype Xd3_sec_type = Xd3_sec_type ( Ptr Xd3_sec_type ) | ||
159 | -- newtype Xd3_sec_cfg = Xd3_sec_cfg ( Ptr Xd3_sec_cfg ) | ||
160 | newtype Xd3_sec_stream = Xd3_sec_stream ( Ptr Xd3_sec_stream ) | ||
161 | -- newtype Xd3_config = Xd3_config ( Ptr Xd3_config ) | ||
162 | newtype Xd3_code_table_desc = Xd3_code_table_desc ( Ptr Xd3_code_table_desc ) | ||
163 | newtype Xd3_code_table_sizes = Xd3_code_table_sizes ( Ptr Xd3_code_table_sizes ) | ||
164 | newtype Xd3_slist = Xd3_slist ( Ptr Xd3_slist ) | ||
165 | newtype Xd3_whole_state = Xd3_whole_state ( Ptr Xd3_whole_state ) | ||
166 | newtype Xd3_wininfo = Xd3_wininfo ( Ptr Xd3_wininfo ) | ||
167 | |||
168 | |||
169 | |||
170 | -- The stream configuration has three callbacks functions, all of | ||
171 | -- which may be supplied with NULL values. If config->getblk is | ||
172 | -- provided as NULL, the stream returns XD3_GETSRCBLK. */ | ||
173 | |||
174 | -- typedef void* (xd3_alloc_func) (void *opaque, | ||
175 | -- size_t items, | ||
176 | -- usize_t size); | ||
177 | foreign export ccall hs_alloc :: Ptr () | ||
178 | -> #type size_t | ||
179 | -> #type usize_t | ||
180 | -> IO ( Ptr ( )) | ||
181 | hs_alloc _ items size = mallocBytes (fromIntegral items * fromIntegral size) | ||
182 | |||
183 | -- typedef void (xd3_free_func) (void *opaque, | ||
184 | -- void *address); | ||
185 | foreign export ccall hs_free :: Ptr () -> Ptr () -> IO () | ||
186 | hs_free _ address = free address | ||
187 | |||
188 | {- | ||
189 | /* third callback */ | ||
190 | typedef int (xd3_getblk_func) (xd3_stream *stream, | ||
191 | xd3_source *source, | ||
192 | xoff_t blkno); | ||
193 | |||
194 | typedef const xd3_dinst* (xd3_code_table_func) (void); | ||
195 | - | ||
196 | -} | ||
197 | |||
198 | -- Type used for short snprintf calls. */ | ||
199 | -- typedef struct { char buf[48]; } shortbuf; | ||
200 | data Shortbuf = Shortbuf {-# UNPACK #-} !Word64 | ||
201 | {-# UNPACK #-} !Word64 | ||
202 | {-# UNPACK #-} !Word64 | ||
203 | {-# UNPACK #-} !Word64 | ||
204 | {-# UNPACK #-} !Word64 | ||
205 | {-# UNPACK #-} !Word64 | ||
206 | |||
207 | |||
208 | |||
209 | -- XPR(NT "", ...) (used by main) prefixes an "xdelta3: " to the output. */ | ||
210 | -- void xprintf(const char *fmt, ...) PRINTF_ATTRIBUTE(1,2); | ||
211 | -- foreign import ccall "xdelta3.h xprintf" xprintf :: ? | ||
212 | |||
213 | pattern NT = #const_str NT | ||
214 | pattern NTR = #const_str NTR | ||
215 | pattern RINT = #const_str RINT | ||
216 | |||
217 | |||
218 | ----------------------------------------------------------------- | ||
219 | -- PUBLIC ENUMS | ||
220 | -------------------------------------------------------------------- | ||
221 | |||
222 | -- | These are the five ordinary status codes returned by the | ||
223 | -- xd3_encode_input() and xd3_decode_input() state machines. | ||
224 | -- | ||
225 | -- An application must be prepared to handle these five return | ||
226 | -- values from either xd3_encode_input or xd3_decode_input except | ||
227 | -- in the case of no-source compression in which case XD3_GETSRCBLK | ||
228 | -- is never returned. More detailed comments for these are given in | ||
229 | -- xd3_encode_input and xd3_decode_input comments below. | ||
230 | newtype Xd3_rvalues = Xd3_rvalues CInt | ||
231 | |||
232 | -- | need input | ||
233 | pattern XD3_INPUT = Xd3_rvalues (#const XD3_INPUT) | ||
234 | |||
235 | -- | have output | ||
236 | pattern XD3_OUTPUT = Xd3_rvalues (#const XD3_OUTPUT) | ||
237 | |||
238 | -- | need a block of source input (with no xd3_getblk function) a chance to do non-blocking read. | ||
239 | pattern XD3_GETSRCBLK = Xd3_rvalues (#const XD3_GETSRCBLK) | ||
240 | |||
241 | -- | (decode-only) after the initial VCDIFF & first window header | ||
242 | pattern XD3_GOTHEADER = Xd3_rvalues (#const XD3_GOTHEADER) | ||
243 | |||
244 | -- | notification: returned before a window is processed giving a chance to XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that window. | ||
245 | pattern XD3_WINSTART = Xd3_rvalues (#const XD3_WINSTART) | ||
246 | |||
247 | -- | notification: returned after encode/decode & output for a window | ||
248 | pattern XD3_WINFINISH = Xd3_rvalues (#const XD3_WINFINISH) | ||
249 | |||
250 | -- | (encoder only) may be returned by getblk() if the block is too old | ||
251 | pattern XD3_TOOFARBACK = Xd3_rvalues (#const XD3_TOOFARBACK) | ||
252 | |||
253 | -- | internal error | ||
254 | pattern XD3_INTERNAL = Xd3_rvalues (#const XD3_INTERNAL) | ||
255 | |||
256 | -- | invalid config | ||
257 | pattern XD3_INVALID = Xd3_rvalues (#const XD3_INVALID) | ||
258 | |||
259 | -- | invalid input/decoder error | ||
260 | pattern XD3_INVALID_INPUT = Xd3_rvalues (#const XD3_INVALID_INPUT) | ||
261 | |||
262 | -- | when secondary compression finds no improvement. | ||
263 | pattern XD3_NOSECOND = Xd3_rvalues (#const XD3_NOSECOND) | ||
264 | |||
265 | -- | currently VCD_TARGET VCD_CODETABLE | ||
266 | pattern XD3_UNIMPLEMENTED = Xd3_rvalues (#const XD3_UNIMPLEMENTED) | ||
267 | |||
268 | |||
269 | |||
270 | |||
271 | -- | special values in config->flags | ||
272 | newtype Xd3_flags = Xd3_flags Word32 | ||
273 | |||
274 | -- used by VCDIFF tools, see xdelta3-main.h.--/ | ||
275 | pattern XD3_JUST_HDR = Xd3_flags (#const XD3_JUST_HDR) | ||
276 | |||
277 | -- used by VCDIFF tools see xdelta3-main.h.--/ | ||
278 | pattern XD3_SKIP_WINDOW = Xd3_flags (#const XD3_SKIP_WINDOW) | ||
279 | |||
280 | -- | used by VCDIFF tools, see xdelta3-main.h. */ | ||
281 | pattern XD3_SKIP_EMIT = Xd3_flags (#const XD3_SKIP_EMIT) | ||
282 | -- | flush the stream buffer to prepare for xd3_stream_close(). */ | ||
283 | pattern XD3_FLUSH = Xd3_flags (#const XD3_FLUSH) | ||
284 | |||
285 | -- | use DJW static huffman */ | ||
286 | pattern XD3_SEC_DJW = Xd3_flags (#const XD3_SEC_DJW) | ||
287 | -- | use FGK adaptive huffman */ | ||
288 | pattern XD3_SEC_FGK = Xd3_flags (#const XD3_SEC_FGK) | ||
289 | -- | use LZMA secondary */ | ||
290 | pattern XD3_SEC_LZMA = Xd3_flags (#const XD3_SEC_LZMA) | ||
291 | |||
292 | pattern XD3_SEC_TYPE = Xd3_flags (#const XD3_SEC_TYPE) | ||
293 | |||
294 | -- | disable secondary compression of the data section. */ | ||
295 | pattern XD3_SEC_NODATA = Xd3_flags (#const XD3_SEC_NODATA) | ||
296 | -- | disable secondary compression of the inst section. */ | ||
297 | pattern XD3_SEC_NOINST = Xd3_flags (#const XD3_SEC_NOINST) | ||
298 | -- | disable secondary compression of the addr section. */ | ||
299 | pattern XD3_SEC_NOADDR = Xd3_flags (#const XD3_SEC_NOADDR) | ||
300 | |||
301 | pattern XD3_SEC_NOALL = Xd3_flags (#const XD3_SEC_NOALL) | ||
302 | |||
303 | -- | enable checksum computation in the encoder. */ | ||
304 | pattern XD3_ADLER32 = Xd3_flags (#const XD3_ADLER32) | ||
305 | -- | disable checksum verification in the decoder. */ | ||
306 | pattern XD3_ADLER32_NOVER = Xd3_flags (#const XD3_ADLER32_NOVER) | ||
307 | |||
308 | -- | disable ordinary data * compression feature, only search * the source, not the target. */ | ||
309 | pattern XD3_NOCOMPRESS = Xd3_flags (#const XD3_NOCOMPRESS) | ||
310 | -- | disable the "1.5-pass * algorithm", instead use greedy * matching. Greedy is off by * default. */ | ||
311 | pattern XD3_BEGREEDY = Xd3_flags (#const XD3_BEGREEDY) | ||
312 | -- | used by "recode". */ | ||
313 | pattern XD3_ADLER32_RECODE = Xd3_flags (#const XD3_ADLER32_RECODE) | ||
314 | |||
315 | -- 4 bits to set the compression level the same as the command-line | ||
316 | -- setting -1 through -9 Xd3_flags (-0 corresponds to the XD3_NOCOMPRESS flag | ||
317 | -- and is independent of compression level). This is for | ||
318 | -- convenience especially with xd3_encode_memoryXd3_flags (). */ | ||
319 | |||
320 | pattern XD3_COMPLEVEL_SHIFT = Xd3_flags (#const XD3_COMPLEVEL_SHIFT) | ||
321 | pattern XD3_COMPLEVEL_MASK = Xd3_flags (#const XD3_COMPLEVEL_MASK) | ||
322 | pattern XD3_COMPLEVEL_1 = Xd3_flags (#const XD3_COMPLEVEL_1) | ||
323 | pattern XD3_COMPLEVEL_2 = Xd3_flags (#const XD3_COMPLEVEL_2) | ||
324 | pattern XD3_COMPLEVEL_3 = Xd3_flags (#const XD3_COMPLEVEL_3) | ||
325 | pattern XD3_COMPLEVEL_6 = Xd3_flags (#const XD3_COMPLEVEL_6) | ||
326 | pattern XD3_COMPLEVEL_9 = Xd3_flags (#const XD3_COMPLEVEL_9) | ||
327 | |||
328 | -- | The values of this enumeration are set in xd3_config using the | ||
329 | -- smatch_cfg variable. It can be set to default, slow, fast, etc., | ||
330 | -- and soft. | ||
331 | data Xd3_smatch_cfg | ||
332 | = XD3_SMATCH_DEFAULT -- ^ Flags may contain XD3_COMPLEVEL bits, else default. | ||
333 | | XD3_SMATCH_SLOW | ||
334 | | XD3_SMATCH_FAST | ||
335 | | XD3_SMATCH_FASTER | ||
336 | | XD3_SMATCH_FASTEST | ||
337 | | XD3_SMATCH_SOFT | ||
338 | deriving Enum | ||
339 | |||
340 | #if 0 | ||
341 | |||
342 | ---------------------------------------------------------------------- | ||
343 | -- PRIVATE ENUMS | ||
344 | ---------------------------------------------------------------------- | ||
345 | |||
346 | -- | stream->match_state is part of the xd3_encode_input state machine | ||
347 | -- for source matching: | ||
348 | -- | ||
349 | -- 1. the XD3_GETSRCBLK block-read mechanism means reentrant matching | ||
350 | -- 2. this state spans encoder windows: a match and end-of-window | ||
351 | -- will continue in the next 3. the initial target byte and source | ||
352 | -- byte are a presumed match, to avoid some computation in case the | ||
353 | -- inputs are identical. | ||
354 | data Xd3_match_state | ||
355 | = MATCH_TARGET -- ^ in this state, attempt to match the start of the | ||
356 | -- target with the previously set source address (initially | ||
357 | -- 0). | ||
358 | | MATCH_BACKWARD -- ^ currently expanding a match backward in the | ||
359 | -- source/target. | ||
360 | | MATCH_FORWARD -- ^ currently expanding a match forward in the | ||
361 | -- source/target. | ||
362 | | MATCH_SEARCHING -- ^ currently searching for a match. | ||
363 | deriving Enum | ||
364 | |||
365 | |||
366 | -- The xd3_encode_input state machine steps through these states in | ||
367 | -- the following order. The matcher is reentrant and returns | ||
368 | -- XD3_INPUT whenever it requires more data. After receiving | ||
369 | -- XD3_INPUT, if the application reads EOF it should call | ||
370 | -- xd3_stream_close(). | ||
371 | data Xd3_encode_input | ||
372 | = ENC_INIT -- ^ xd3_encode_input has never been called. | ||
373 | | ENC_INPUT -- ^ waiting for xd3_avail_input () to be called. | ||
374 | | ENC_SEARCH -- ^ currently searching for matches. | ||
375 | | ENC_INSTR -- ^ currently formatting output. | ||
376 | | ENC_FLUSH -- ^ currently emitting output. | ||
377 | | ENC_POSTOUT -- ^ after an output section. | ||
378 | | ENC_POSTWIN -- ^ after all output sections. | ||
379 | | ENC_ABORTED -- ^ abort. | ||
380 | deriving Enum | ||
381 | |||
382 | -- | The xd3_decode_input state machine steps through these states in | ||
383 | -- the following order. The matcher is reentrant and returns | ||
384 | -- XD3_INPUT whenever it requires more data. After receiving | ||
385 | -- XD3_INPUT, if the application reads EOF it should call | ||
386 | -- xd3_stream_close(). | ||
387 | -- | ||
388 | -- 0-8: the VCDIFF header | ||
389 | -- 9-18: the VCDIFF window header | ||
390 | -- 19-21: the three primary sections: data, inst, addr | ||
391 | -- 22: producing output: returns XD3_OUTPUT, possibly XD3_GETSRCBLK, | ||
392 | -- 23: return XD3_WINFINISH, set state=9 to decode more input | ||
393 | data Xd3_decode_state | ||
394 | = DEC_VCHEAD -- ^ VCDIFF header | ||
395 | | DEC_HDRIND -- ^ header indicator | ||
396 | |||
397 | | DEC_SECONDID -- ^ secondary compressor ID | ||
398 | |||
399 | | DEC_TABLEN -- ^ code table length | ||
400 | | DEC_NEAR -- ^ code table near | ||
401 | | DEC_SAME -- ^ code table same | ||
402 | | DEC_TABDAT -- ^ code table data | ||
403 | |||
404 | | DEC_APPLEN -- ^ application data length | ||
405 | | DEC_APPDAT -- ^ application data | ||
406 | |||
407 | | DEC_WININD -- ^ window indicator | ||
408 | |||
409 | | DEC_CPYLEN -- ^ copy window length | ||
410 | | DEC_CPYOFF -- ^ copy window offset | ||
411 | |||
412 | | DEC_ENCLEN -- ^ length of delta encoding | ||
413 | | DEC_TGTLEN -- ^ length of target window | ||
414 | | DEC_DELIND -- ^ delta indicator | ||
415 | |||
416 | | DEC_DATALEN -- ^ length of ADD+RUN data | ||
417 | | DEC_INSTLEN -- ^ length of instruction data | ||
418 | | DEC_ADDRLEN -- ^ length of address data | ||
419 | |||
420 | | DEC_CKSUM -- ^ window checksum | ||
421 | |||
422 | | DEC_DATA -- ^ data section | ||
423 | | DEC_INST -- ^ instruction section | ||
424 | | DEC_ADDR -- ^ address section | ||
425 | |||
426 | | DEC_EMIT -- ^ producing data | ||
427 | |||
428 | | DEC_FINISH -- ^ window finished | ||
429 | |||
430 | | DEC_ABORTED -- ^ xd3_abort_stream | ||
431 | deriving Enum | ||
432 | |||
433 | ------------------------------------------------------------- | ||
434 | -- internal types | ||
435 | -------------------------------------------------------------- | ||
436 | |||
437 | -- skipped. | ||
438 | |||
439 | #endif | ||
440 | |||
441 | --------------------------------------------------------------------- | ||
442 | -- public types | ||
443 | --------------------------------------------------------------------- | ||
444 | |||
445 | -- | Settings for the secondary compressor. | ||
446 | data Xd3_sec_cfg = Xd3_sec_cfg | ||
447 | { data_type :: Int -- ^ Which section. (set automatically) | ||
448 | , ngroups :: Usize_t -- ^ Number of DJW Huffman groups. | ||
449 | , sector_size :: Usize_t -- ^ Sector size. | ||
450 | , inefficient :: Int -- ^ If true, ignore efficiency check [avoid XD3_NOSECOND]. | ||
451 | } | ||
452 | |||
453 | -- | This is the user-visible stream configuration. */ | ||
454 | data Xd3_config = Xd3_config | ||
455 | { winsize :: Usize_t -- ^ The encoder window size. | ||
456 | , sprevsz :: Usize_t -- ^ How far back small string matching goes | ||
457 | , iopt_size :: Usize_t -- ^ entries in the instruction-optimizing buffer | ||
458 | |||
459 | -- xd3_getblk_func *getblk; /* The three callbacks. */ | ||
460 | -- xd3_alloc_func *alloc; | ||
461 | -- xd3_free_func *freef; | ||
462 | -- void *opaque; /* Not used. */ | ||
463 | |||
464 | , flags :: Xd3_flags -- ^ stream->flags are initialized from xd3_config & never modified by the library. Use xd3_set_flags to modify flags settings mid-stream. | ||
465 | , sec_data :: Xd3_sec_cfg -- ^ Secondary compressor config: data | ||
466 | , sec_inst :: Xd3_sec_cfg -- ^ Secondary compressor config: inst | ||
467 | , sec_addr :: Xd3_sec_cfg -- ^ Secondary compressor config: addr | ||
468 | , smatch_cfg :: Xd3_smatch_cfg -- ^ See enum: use fields below for soft config | ||
469 | , smatcher_soft :: Xd3_smatcher | ||
470 | } | ||
471 | |||
472 | -- | The primary source file object. You create one of these objects and | ||
473 | -- initialize the first four fields. This library maintains the next | ||
474 | -- 5 fields. The configured getblk implementation is responsible for | ||
475 | -- setting the final 3 fields when called (and/or when XD3_GETSRCBLK | ||
476 | -- is returned). | ||
477 | data Xd3_source = Xd3_source | ||
478 | { | ||
479 | -- you set | ||
480 | blksize :: Usize_t -- ^ block size | ||
481 | , name :: String -- ^ its name, for debug/print purposes | ||
482 | , ioh :: Ptr () -- ^ opaque handle */ | ||
483 | , max_winsize :: Xoff_t -- ^ maximum visible buffer | ||
484 | |||
485 | -- getblk sets | ||
486 | , curblkno :: Xoff_t -- ^ current block number: client sets after getblk request | ||
487 | , onblk :: Usize_t -- ^ number of bytes on current block: client sets, must be >= 0 and <= blksize | ||
488 | , curblk :: Ptr Word8 -- ^ current block array: client sets after getblk request */ | ||
489 | |||
490 | -- xd3 sets | ||
491 | , srclen :: Usize_t -- ^ length of this source window | ||
492 | , srcbase :: Xoff_t -- ^ offset of this source window in the source itself | ||
493 | , shiftby :: Usize_t -- ^ for power-of-two blocksizes | ||
494 | , maskby :: Usize_t -- ^ for power-of-two blocksizes | ||
495 | , cpyoff_blocks :: Xoff_t -- ^ offset of dec_cpyoff in blocks | ||
496 | , cpyoff_blkoff :: Usize_t -- ^ offset of copy window in blocks, remainder | ||
497 | , getblkno :: Xoff_t -- ^ request block number: xd3 sets current getblk request | ||
498 | |||
499 | -- See xd3_getblk() | ||
500 | , max_blkno :: Xoff_t -- ^ Maximum block, if eof is known, otherwise, equals frontier_blkno (initially 0). | ||
501 | , onlastblk :: Usize_t -- ^ Number of bytes on max_blkno | ||
502 | , eof_known :: Int -- ^ Set to true when the first partial block is read. | ||
503 | } | ||
504 | |||
505 | -- | The primary xd3_stream object, used for encoding and decoding. You | ||
506 | -- may access only two fields: avail_out, next_out. Use the methods | ||
507 | -- above to operate on xd3_stream. | ||
508 | |||
509 | data Xd3_stream = Xd3_stream | ||
510 | { | ||
511 | -- input state */ | ||
512 | next_in :: Ptr Word8 -- ^ next input byte */ | ||
513 | , avail_in :: Usize_t -- ^ number of bytes available at next_in | ||
514 | , total_in :: Xoff_t -- ^ how many bytes in | ||
515 | |||
516 | -- output state */ | ||
517 | , next_out :: Ptr Word8 -- ^ next output byte */ | ||
518 | , avail_out :: Usize_t -- ^ number of bytes available at next_out | ||
519 | , space_out :: Usize_t -- ^ total out space | ||
520 | , current_window :: Xoff_t -- ^ number of windows encoded/decoded | ||
521 | , total_out :: Xoff_t -- ^ how many bytes out | ||
522 | |||
523 | -- to indicate an error, xd3 sets */ | ||
524 | , msg :: Maybe String -- last error message, NULL if no error */ | ||
525 | |||
526 | -- source configuration */ | ||
527 | , src :: Xd3_source -- ^ source array */ | ||
528 | |||
529 | -- encoder memory configuration */ | ||
530 | -- , winsize :: Usize_t -- ^ suggested window size | ||
531 | -- , sprevsz :: Usize_t -- ^ small string, previous window size (power of 2) | ||
532 | , sprevmask :: Usize_t -- ^ small string, previous window size mask | ||
533 | -- , iopt_size :: Usize_t | ||
534 | , iopt_unlimited :: Usize_t | ||
535 | |||
536 | -- general configuration */ | ||
537 | -- xd3_getblk_func *getblk; /* set nxtblk, nxtblkno to scanblkno */ | ||
538 | -- xd3_alloc_func *alloc; /* malloc function */ | ||
539 | -- xd3_free_func *free; /* free function */ | ||
540 | , opaque :: Ptr () -- ^ private data object passed to alloc, free, and getblk | ||
541 | -- , flags :: Uint32_t -- ^ various options | ||
542 | |||
543 | -- secondary compressor configuration */ | ||
544 | -- , sec_data :: Xd3_sec_cfg -- ^ Secondary compressor config: data | ||
545 | -- , sec_inst :: Xd3_sec_cfg -- ^ Secondary compressor config: inst | ||
546 | -- , sec_addr :: Xd3_sec_cfg -- ^ Secondary compressor config: addr | ||
547 | |||
548 | , smatcher :: Xd3_smatcher | ||
549 | |||
550 | {- | ||
551 | |||
552 | usize_t *large_table; /* table of large checksums */ | ||
553 | , large_hash :: xd3_hash_cfg -- ^ large hash config | ||
554 | |||
555 | usize_t *small_table; /* table of small checksums */ | ||
556 | xd3_slist *small_prev; /* table of previous offsets, circular linked list */ | ||
557 | , small_reset :: int -- ^ true if small table should be reset | ||
558 | |||
559 | , small_hash :: xd3_hash_cfg -- ^ small hash config | ||
560 | , acache :: xd3_addr_cache -- ^ the vcdiff address cache | ||
561 | , enc_state :: xd3_encode_state -- ^ state of the encoder | ||
562 | |||
563 | , taroff :: usize_t -- ^ base offset of the target input | ||
564 | , input_position :: usize_t -- ^ current input position | ||
565 | , min_match :: usize_t -- ^ current minimum match length, avoids redundent matches | ||
566 | , unencoded_offset :: usize_t -- ^ current input, first * unencoded offset. this value is <= the first instruction's position in the iopt buffer, if there is at least one match in the buffer. | ||
567 | |||
568 | /* SRCWIN */ | ||
569 | , srcwin_decided :: int -- ^ boolean: true if srclen and srcbase have been decided. | ||
570 | , srcwin_decided_early :: int -- ^ boolean: true if srclen and srcbase were decided early. | ||
571 | , srcwin_cksum_pos :: xoff_t -- ^ Source checksum position | ||
572 | |||
573 | /* MATCH */ | ||
574 | , match_state :: xd3_match_state -- ^ encoder match state | ||
575 | , match_srcpos :: xoff_t -- ^ current match source position relative to srcbase | ||
576 | , match_last_srcpos :: xoff_t -- ^ previously attempted srcpos, to avoid loops. | ||
577 | , match_minaddr :: xoff_t -- ^ smallest matching address to set window params (reset each window xd3_encode_reset) | ||
578 | , match_maxaddr :: xoff_t -- ^ largest matching address to set window params (reset each window xd3_encode_reset) | ||
579 | , match_back :: usize_t -- ^ match extends back so far | ||
580 | , match_maxback :: usize_t -- ^ match extends back maximum | ||
581 | , match_fwd :: usize_t -- ^ match extends forward so far | ||
582 | , match_maxfwd :: usize_t -- ^ match extends forward maximum | ||
583 | |||
584 | , maxsrcaddr :: xoff_t -- ^ address of the last source match (across windows) | ||
585 | |||
586 | uint8_t *buf_in; /* for saving buffered input */ | ||
587 | , buf_avail :: usize_t -- ^ amount of saved input | ||
588 | const uint8_t *buf_leftover; /* leftover content of next_in (i.e., user's buffer) */ | ||
589 | , buf_leftavail :: usize_t -- ^ amount of leftover content | ||
590 | |||
591 | xd3_output *enc_current; /* current output buffer */ | ||
592 | xd3_output *enc_free; /* free output buffers */ | ||
593 | xd3_output *enc_heads[4]; /* array of encoded outputs: head of chain */ | ||
594 | xd3_output *enc_tails[4]; /* array of encoded outputs: tail of chain */ | ||
595 | uint32_t recode_adler32; /* set the adler32 checksum * during "recode". */ | ||
596 | |||
597 | , iopt_used :: xd3_rlist -- ^ instruction optimizing buffer | ||
598 | xd3_rlist iopt_free; | ||
599 | xd3_rinst *iout; /* next single instruction */ | ||
600 | xd3_iopt_buflist *iopt_alloc; | ||
601 | |||
602 | const uint8_t *enc_appheader; /* application header to encode */ | ||
603 | usize_t enc_appheadsz; /* application header size */ | ||
604 | |||
605 | /* decoder stuff */ | ||
606 | , dec_state :: xd3_decode_state -- ^ current DEC_XXX value | ||
607 | , dec_hdr_ind :: usize_t -- ^ VCDIFF header indicator | ||
608 | , dec_win_ind :: usize_t -- ^ VCDIFF window indicator | ||
609 | , dec_del_ind :: usize_t -- ^ VCDIFF delta indicator | ||
610 | |||
611 | uint8_t dec_magic[4]; /* First four bytes */ | ||
612 | , dec_magicbytes :: usize_t -- ^ Magic position. | ||
613 | |||
614 | , dec_secondid :: usize_t -- ^ Optional secondary compressor ID. | ||
615 | |||
616 | , dec_codetblsz :: usize_t -- ^ Optional code table: length. | ||
617 | uint8_t *dec_codetbl; /* Optional code table: storage. */ | ||
618 | , dec_codetblbytes :: usize_t -- ^ Optional code table: position. | ||
619 | |||
620 | , dec_appheadsz :: usize_t -- ^ Optional application header: size. | ||
621 | uint8_t *dec_appheader; /* Optional application header: storage */ | ||
622 | , dec_appheadbytes :: usize_t -- ^ Optional application header: position. | ||
623 | |||
624 | , dec_cksumbytes :: usize_t -- ^ Optional checksum: position. | ||
625 | uint8_t dec_cksum[4]; /* Optional checksum: storage. */ | ||
626 | uint32_t dec_adler32; /* Optional checksum: value. */ | ||
627 | |||
628 | , dec_cpylen :: usize_t -- ^ length of copy window (VCD_SOURCE or VCD_TARGET) | ||
629 | , dec_cpyoff :: xoff_t -- ^ offset of copy window (VCD_SOURCE or VCD_TARGET) | ||
630 | , dec_enclen :: usize_t -- ^ length of delta encoding | ||
631 | , dec_tgtlen :: usize_t -- ^ length of target window | ||
632 | |||
633 | #if USE_UINT64 | ||
634 | uint64_t dec_64part; /* part of a decoded uint64_t */ | ||
635 | #endif | ||
636 | #if USE_UINT32 | ||
637 | uint32_t dec_32part; /* part of a decoded uint32_t */ | ||
638 | #endif | ||
639 | |||
640 | , dec_winstart :: xoff_t -- ^ offset of the start of current target window | ||
641 | , dec_window_count :: xoff_t -- ^ == current_window + 1 in DEC_FINISH | ||
642 | , dec_winbytes :: usize_t -- ^ bytes of the three sections so far consumed | ||
643 | , dec_hdrsize :: usize_t -- ^ VCDIFF + app header size | ||
644 | |||
645 | const uint8_t *dec_tgtaddrbase; /* Base of decoded target addresses (addr >= dec_cpylen). */ | ||
646 | const uint8_t *dec_cpyaddrbase; /* Base of decoded copy addresses (addr < dec_cpylen). */ | ||
647 | |||
648 | , dec_position :: usize_t -- ^ current decoder position counting the cpylen offset | ||
649 | , dec_maxpos :: usize_t -- ^ maximum decoder position counting the cpylen offset | ||
650 | xd3_hinst dec_current1; /* current instruction */ | ||
651 | xd3_hinst dec_current2; /* current instruction */ | ||
652 | |||
653 | uint8_t *dec_buffer; /* Decode buffer */ | ||
654 | uint8_t *dec_lastwin; /* In case of VCD_TARGET, the last target window. */ | ||
655 | , dec_lastlen :: usize_t -- ^ length of the last target window | ||
656 | , dec_laststart :: xoff_t -- ^ offset of the start of last target window | ||
657 | , dec_lastspace :: usize_t -- ^ allocated space of last target window, for reuse | ||
658 | |||
659 | , inst_sect :: xd3_desect -- ^ staging area for decoding window sections | ||
660 | xd3_desect addr_sect; | ||
661 | xd3_desect data_sect; | ||
662 | |||
663 | xd3_code_table_func *code_table_func; | ||
664 | const xd3_dinst *code_table; | ||
665 | const xd3_code_table_desc *code_table_desc; | ||
666 | xd3_dinst *code_table_alloc; | ||
667 | |||
668 | /* secondary compression */ | ||
669 | const xd3_sec_type *sec_type; | ||
670 | xd3_sec_stream *sec_stream_d; | ||
671 | xd3_sec_stream *sec_stream_i; | ||
672 | xd3_sec_stream *sec_stream_a; | ||
673 | |||
674 | /* state for reconstructing whole files (e.g., for merge), this only | ||
675 | * supports loading USIZE_T_MAX instructions, adds, etc. */ | ||
676 | xd3_whole_state whole_target; | ||
677 | |||
678 | /* statistics */ | ||
679 | xoff_t n_scpy; | ||
680 | xoff_t n_tcpy; | ||
681 | xoff_t n_add; | ||
682 | xoff_t n_run; | ||
683 | |||
684 | xoff_t l_scpy; | ||
685 | xoff_t l_tcpy; | ||
686 | xoff_t l_add; | ||
687 | xoff_t l_run; | ||
688 | |||
689 | usize_t i_slots_used; | ||
690 | |||
691 | #if XD3_DEBUG | ||
692 | usize_t large_ckcnt; | ||
693 | |||
694 | /* memory usage */ | ||
695 | usize_t alloc_cnt; | ||
696 | usize_t free_cnt; | ||
697 | #endif | ||
698 | -} | ||
699 | |||
700 | } | ||
701 | |||
702 | --------------------------------------------------------------------------- | ||
703 | -- PUBLIC FUNCTIONS | ||
704 | ---------------------------------------------------------------------------- | ||
705 | |||
706 | -- | This function configures an xd3_stream using the provided in-memory | ||
707 | -- input buffer, source buffer, output buffer, and flags. The output | ||
708 | -- array must be large enough or else ENOSPC will be returned. This | ||
709 | -- is the simplest in-memory encoding interface. | ||
710 | foreign import ccall "xdelta3.h xd3_encode_memory" | ||
711 | xd3_encode_memory :: Ptr Word8 -- input | ||
712 | -> Usize_t -- input_size | ||
713 | -> Ptr Word8 -- source | ||
714 | -> Usize_t -- source_size | ||
715 | -> Ptr Word8 -- output_buffer | ||
716 | -> Ptr Usize_t -- output_size | ||
717 | -> Usize_t -- avail_output | ||
718 | -> Int -- flags | ||
719 | -> IO CInt | ||
720 | |||
721 | -- | The reverse of xd3_encode_memory. | ||
722 | foreign import ccall "xdelta3.h xd3_decode_memory" | ||
723 | xd3_decode_memory :: Ptr Word8 -- input, | ||
724 | -> Usize_t -- input_size, | ||
725 | -> Ptr Word8 -- source, | ||
726 | -> Usize_t -- source_size, | ||
727 | -> Ptr Word8 -- output_buf, | ||
728 | -> Ptr Usize_t -- output_size, | ||
729 | -> Usize_t -- avail_output, | ||
730 | -> Int -- flags | ||
731 | -> IO CInt | ||
732 | |||
733 | -- | This function encodes an in-memory input using a pre-configured | ||
734 | -- xd3_stream. This allows the caller to set a variety of options | ||
735 | -- which are not available in the xd3_encode/decode_memory() | ||
736 | -- functions. | ||
737 | -- | ||
738 | -- The output array must be large enough to hold the output or else | ||
739 | -- ENOSPC is returned. The source (if any) should be set using | ||
740 | -- xd3_set_source_and_size() with a single-block xd3_source. This | ||
741 | -- calls the underlying non-blocking interfaces, | ||
742 | -- xd3_encode/decode_input(), handling the necessary input/output | ||
743 | -- states. This method may be considered a reference for any | ||
744 | -- application using xd3_encode_input() directly. | ||
745 | -- | ||
746 | -- xd3_stream stream; | ||
747 | -- xd3_config config; | ||
748 | -- xd3_source src; | ||
749 | -- | ||
750 | -- memset (& src, 0, sizeof (src)); | ||
751 | -- memset (& stream, 0, sizeof (stream)); | ||
752 | -- memset (& config, 0, sizeof (config)); | ||
753 | -- | ||
754 | -- if (source != NULL) | ||
755 | -- { | ||
756 | -- src.size = source_size; | ||
757 | -- src.blksize = source_size; | ||
758 | -- src.curblkno = 0; | ||
759 | -- src.onblk = source_size; | ||
760 | -- src.curblk = source; | ||
761 | -- src.max_winsize = source_size; | ||
762 | -- xd3_set_source(&stream, &src); | ||
763 | -- } | ||
764 | -- | ||
765 | -- config.flags = flags; | ||
766 | -- config.winsize = input_size; | ||
767 | -- | ||
768 | -- ... set smatcher, appheader, encoding-table, compression-level, etc. | ||
769 | -- | ||
770 | -- xd3_config_stream(&stream, &config); | ||
771 | -- xd3_encode_stream(&stream, ...); | ||
772 | -- xd3_free_stream(&stream); | ||
773 | foreign import ccall "xdelta3.h xd3_encode_stream" | ||
774 | xd3_encode_stream :: Ptr Xd3_stream -- stream, | ||
775 | -> Ptr Word8 -- input, | ||
776 | -> Usize_t -- input_size, | ||
777 | -> Ptr Word8 -- output, | ||
778 | -> Ptr Usize_t -- output_size, | ||
779 | -> Usize_t -- avail_output); | ||
780 | -> IO Int | ||
781 | |||
782 | -- | The reverse of xd3_encode_stream. | ||
783 | foreign import ccall "xdelta3.h xd3_decode_stream" | ||
784 | xd3_decode_stream :: Ptr Xd3_stream -- stream, | ||
785 | -> Ptr Word8 -- input, | ||
786 | -> Usize_t -- input_size, | ||
787 | -> Ptr Word8 -- output, | ||
788 | -> Ptr Usize_t -- output_size, | ||
789 | -> Usize_t -- avail_size); | ||
790 | -> IO Int | ||
791 | |||
792 | -- | This is the non-blocking interface. | ||
793 | -- | ||
794 | -- Handling input and output states is the same for encoding or | ||
795 | -- decoding using the xd3_avail_input() and xd3_consume_output() | ||
796 | -- routines, inlined below. | ||
797 | -- | ||
798 | -- Return values: | ||
799 | -- | ||
800 | -- XD3_INPUT: the process requires more input: call | ||
801 | -- xd3_avail_input() then repeat | ||
802 | -- | ||
803 | -- XD3_OUTPUT: the process has more output: read stream->next_out, | ||
804 | -- stream->avail_out, then call xd3_consume_output(), | ||
805 | -- then repeat | ||
806 | -- | ||
807 | -- XD3_GOTHEADER: (decoder-only) notification returned following the | ||
808 | -- VCDIFF header and first window header. the decoder | ||
809 | -- may use the header to configure itself. | ||
810 | -- | ||
811 | -- XD3_WINSTART: a general notification returned once for each | ||
812 | -- window except the 0-th window, which is implied by | ||
813 | -- XD3_GOTHEADER. It is recommended to use a | ||
814 | -- switch-stmt such as: | ||
815 | -- | ||
816 | -- ... | ||
817 | -- again: | ||
818 | -- switch ((ret = xd3_decode_input (stream))) { | ||
819 | -- case XD3_GOTHEADER: { | ||
820 | -- assert(stream->current_window == 0); | ||
821 | -- stuff; | ||
822 | -- } | ||
823 | -- // fallthrough | ||
824 | -- case XD3_WINSTART: { | ||
825 | -- something(stream->current_window); | ||
826 | -- goto again; | ||
827 | -- } | ||
828 | -- ... | ||
829 | -- | ||
830 | -- XD3_WINFINISH: a general notification, following the complete | ||
831 | -- input & output of a window. at this point, | ||
832 | -- stream->total_in and stream->total_out are consistent | ||
833 | -- for either encoding or decoding. | ||
834 | -- | ||
835 | -- XD3_GETSRCBLK: If the xd3_getblk() callback is NULL, this value | ||
836 | -- is returned to initiate a non-blocking source read. | ||
837 | foreign import ccall "xdelta3.h xd3_decode_input" xd3_decode_input :: Ptr Xd3_stream -> IO Xd3_rvalues | ||
838 | foreign import ccall "xdelta3.h xd3_encode_input" xd3_encode_input :: Ptr Xd3_stream -> IO Xd3_rvalues | ||
839 | |||
840 | -- | The xd3_config structure is used to initialize a stream - all data | ||
841 | -- is copied into stream so config may be a temporary variable. See | ||
842 | -- the [documentation] or comments on the xd3_config structure. | ||
843 | foreign import ccall "xdelta3.h xd3_config_stream" xd3_config_stream :: Ptr Xd3_stream -> Ptr xd3_config -> IO Xd3_rvalues | ||
844 | |||
845 | -- | Since Xdelta3 doesn't open any files, xd3_close_stream is just an | ||
846 | -- error check that the stream is in a proper state to be closed: this | ||
847 | -- means the encoder is flushed and the decoder is at a window | ||
848 | -- boundary. The application is responsible for freeing any of the | ||
849 | -- resources it supplied. | ||
850 | foreign import ccall "xdelta3.h xd3_close_stream" xd3_close_stream :: Ptr Xd3_stream -> IO Xd3_rvalues | ||
851 | |||
852 | -- | This arranges for closes the stream to succeed. Does not free the | ||
853 | -- stream. | ||
854 | foreign import ccall "xdelta3.h xd3_abort_stream" xd3_abort_stream :: Ptr Xd3_stream -> IO () | ||
855 | |||
856 | -- | xd3_free_stream frees all memory allocated for the stream. The | ||
857 | -- application is responsible for freeing any of the resources it | ||
858 | -- supplied. | ||
859 | foreign import ccall "xdelta3.h xd3_free_stream" xd3_free_stream :: Ptr Xd3_stream -> IO () | ||
860 | |||
861 | -- | This function informs the encoder or decoder that source matching | ||
862 | -- (i.e., delta-compression) is possible. For encoding, this should | ||
863 | -- be called before the first xd3_encode_input. A NULL source is | ||
864 | -- ignored. For decoding, this should be called before the first | ||
865 | -- window is decoded, but the appheader may be read first | ||
866 | -- (XD3_GOTHEADER). After decoding the header, call xd3_set_source() | ||
867 | -- if you have a source file. Note: if (stream->dec_win_ind & VCD_SOURCE) | ||
868 | -- is true, it means the first window expects there to be a source file. | ||
869 | foreign import ccall "xdelta3.h xd3_set_source" xd3_set_source :: Ptr Xd3_stream -> Ptr Xd3_source -> IO CInt | ||
870 | |||
871 | -- | If the source size is known, call this instead of xd3_set_source(). | ||
872 | -- to avoid having stream->getblk called (and/or to avoid XD3_GETSRCBLK). | ||
873 | -- | ||
874 | -- Follow these steps: | ||
875 | -- | ||
876 | -- > xd3_source source; | ||
877 | -- > memset(&source, 0, sizeof(source)); | ||
878 | -- > source.blksize = size; | ||
879 | -- > source.onblk = size; | ||
880 | -- > source.curblk = buf; | ||
881 | -- > source.curblkno = 0; | ||
882 | -- > int ret = xd3_set_source_and_size(&stream, &source, size); | ||
883 | -- > ... | ||
884 | foreign import ccall "xdelta3.h xd3_set_source_and_size" xd3_set_source_and_size :: Ptr Xd3_stream -> Ptr Xd3_source -> Xoff_t -> IO CInt | ||
885 | |||
886 | -- | This should be called before the first call to xd3_encode_input() | ||
887 | -- to include application-specific data in the VCDIFF header. */ | ||
888 | foreign import ccall "xdelta3.h xd3_set_appheader" xd3_set_appheader :: Ptr Xd3_stream -> Ptr Word8 -> Usize_t -> IO () | ||
889 | |||
890 | -- xd3_get_appheader may be called in the decoder after XD3_GOTHEADER. | ||
891 | -- For convenience, the decoder always adds a single byte padding to | ||
892 | -- the end of the application header, which is set to zero in case the | ||
893 | -- application header is a string. | ||
894 | foreign import ccall "xdelta3.h xd3_get_appheader" xd3_get_appheader :: Ptr Xd3_stream -> Ptr (Ptr Word8) -> Ptr Usize_t -> IO Xd3_rvalues | ||
895 | |||
896 | -- | To generate a VCDIFF encoded delta with xd3_encode_init() from | ||
897 | -- another format, use: | ||
898 | -- | ||
899 | -- xd3_encode_init_partial() -- initialze encoder state (w/o hash tables) | ||
900 | -- xd3_init_cache() -- reset VCDIFF address cache | ||
901 | -- xd3_found_match() -- to report a copy instruction | ||
902 | -- | ||
903 | -- set stream->enc_state to ENC_INSTR and call xd3_encode_input as usual. | ||
904 | foreign import ccall "xdelta3.h xd3_encode_init_partial" xd3_encode_init_partial :: Ptr Xd3_stream -> IO Xd3_rvalues | ||
905 | foreign import ccall "xdelta3.h xd3_init_cache" xd3_init_cache :: Ptr Xd3_addr_cache -> IO () | ||
906 | foreign import ccall "xdelta3.h xd3_found_match" xd3_found_match :: Ptr Xd3_stream -> Usize_t -> Usize_t -> Xoff_t -> CInt -> IO Xd3_rvalues | ||
907 | |||
908 | |||
909 | -- | Gives an error string for xdelta3-speficic errors, returns NULL for | ||
910 | -- system errors | ||
911 | foreign import ccall "xdelta3.h xd3_strerror" xd3_strerror :: Xd3_rvalues -> Ptr CChar | ||
912 | |||
913 | |||
914 | -- For convenience, zero & initialize the xd3_config structure with | ||
915 | -- specified flags. */ | ||
916 | foreign import ccall "xdelta3.h xd3_init_config" xd3_init_config :: Ptr Xd3_config -> Word32 -> IO () | ||
917 | |||
918 | -- | This supplies some input to the stream. | ||
919 | -- | ||
920 | -- For encoding, if the input is larger than the configured window | ||
921 | -- size (xd3_config.winsize), the entire input will be consumed and | ||
922 | -- encoded anyway. If you wish to strictly limit the window size, | ||
923 | -- limit the buffer passed to xd3_avail_input to the window size. | ||
924 | -- | ||
925 | -- For encoding, if the input is smaller than the configured window | ||
926 | -- size (xd3_config.winsize), the library will create a window-sized | ||
927 | -- buffer and accumulate input until a full-sized window can be | ||
928 | -- encoded. XD3_INPUT will be returned. The input must remain valid | ||
929 | -- until the next time xd3_encode_input() returns XD3_INPUT. | ||
930 | -- | ||
931 | -- For decoding, the input will be consumed entirely before XD3_INPUT | ||
932 | -- is returned again. | ||
933 | foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Xd3_stream -> Ptr Word8 -> Usize_t -> IO () | ||
934 | |||
935 | -- | This acknowledges receipt of output data, must be called after any | ||
936 | -- XD3_OUTPUT return. | ||
937 | foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Xd3_stream -> IO () | ||
938 | |||
939 | -- | These are set for each XD3_WINFINISH return. | ||
940 | foreign import ccall "xdelta3.h xd3_encoder_used_source" xd3_encoder_used_source :: Ptr Xd3_stream -> IO Bool | ||
941 | foreign import ccall "xdelta3.h xd3_encoder_srcbase" xd3_encoder_srcbase :: Ptr xd3_stream -> IO Xoff_t | ||
942 | foreign import ccall "xdelta3.h xd3_encoder_srclen" xd3_encoder_srclen :: Ptr Xd3_stream -> IO Usize_t | ||
943 | |||
944 | |||
945 | -- | Checks for legal flag changes. | ||
946 | foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Xd3_stream -> Xd3_flags -> IO () | ||
947 | |||
948 | -- | Gives some extra information about the latest library error, if any | ||
949 | -- is known. | ||
950 | foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Xd3_stream -> CString | ||
951 | |||
952 | -- | 64-bit divisions are expensive, which is why we require a | ||
953 | -- power-of-two source->blksize. To relax this restriction is | ||
954 | -- relatively easy, see the history for xd3_blksize_div(). */ | ||
955 | foreign import ccall "xdelta3.h xd3_blksize_div" xd3_blksize_div :: Xoff_t -> Ptr Xd3_source -> Ptr Xoff_t -> Usize_t -> IO () | ||
956 | |||
957 | foreign import ccall "xdelta3.h xd3_blksize_add" xd3_blksize_add :: Ptr Xoff_t -> Ptr Usize_t -> Ptr Xd3_source -> Usize_t -> IO () | ||
958 | |||
959 | pattern XD3_NOOP = #const XD3_NOOP | ||
960 | pattern XD3_ADD = #const XD3_ADD | ||
961 | pattern XD3_RUN = #const XD3_RUN | ||
962 | pattern XD3_CPY = #const XD3_CPY | ||
diff --git a/xdelta3.cabal b/xdelta3.cabal new file mode 100644 index 0000000..d7368bd --- /dev/null +++ b/xdelta3.cabal | |||
@@ -0,0 +1,36 @@ | |||
1 | cabal-version: 2.2 | ||
2 | name: xdelta | ||
3 | version: 3.2.0 | ||
4 | synopsis: VCDIFF encoder/decoder. | ||
5 | -- description: | ||
6 | homepage: xdelta.org | ||
7 | license: Apache-2.0 | ||
8 | license-file: xdelta3/LICENSE | ||
9 | author: Josh MacDonald | ||
10 | maintainer: josh.macdonald@gmail.com | ||
11 | -- copyright: | ||
12 | category: Data | ||
13 | -- build-type: Custom | ||
14 | build-type: Simple | ||
15 | |||
16 | extra-source-files: xdelta3/*.h xdelta3/*.c | ||
17 | |||
18 | library | ||
19 | exposed-modules: XDelta | ||
20 | |||
21 | build-tools: hsc2hs | ||
22 | -- include-dirs: xdelta3 xdelta3_lib | ||
23 | -- extra-lib-dirs: xdelta3_lib | ||
24 | include-dirs: haskell . | ||
25 | -- cc-options: -std=c++14 -Wno-literal-suffix | ||
26 | cxx-options: -Wno-literal-suffix | ||
27 | cpp-options: -DNOT_MAIN=1 -DREGRESSION_TEST=1 -DSECONDARY_DJW=1 -DSECONDARY_FGK=1 -DXD3_MAIN=1 -DXD3_DEBUG=0 -DHAVE_CONFIG | ||
28 | -- cpp-options: -DHAVE_CONFIG_H -DSIZEOF_SIZE_T=__SIZEOF_SIZE_T__ -DSIZEOF_UNSIGNED_INT=__SIZEOF_INT__ -DSIZEOF_UNSIGNED_LONG=__SIZEOF_LONG__ | ||
29 | -- cpp-options: -DSIZEOF_UNSIGNED_LONG_LONG=__SIZEOF_LONG_LONG__ | ||
30 | -- -DHAVE_CONFIG_H=1 | ||
31 | -- -include xdelta3/build2/config.h -Wall -Wshadow -fno-builtin -Wextra -Wsign-compare -Wformat=2 -Wno-format-nonliteral -Wno-unused-parameter -Wno-unused-function extra-libraries: xdelta3, lzma, stdc++, m | ||
32 | cxx-sources: haskell/xdelta3.cc | ||
33 | |||
34 | hs-source-dirs: haskell | ||
35 | build-depends: base >=4.10, bytestring, text | ||
36 | default-language: Haskell2010 | ||