summaryrefslogtreecommitdiff
path: root/haskell/XDelta.hsc
blob: c449b9d7a42b47ee0f81d0802d08f68fff12dffc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
{-# LANGUAGE BangPatterns               #-}
{-# LANGUAGE GADTs                      #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase                 #-}
{-# LANGUAGE PatternSynonyms            #-}
module XDelta where

import Control.Exception
import Control.Monad
import Data.Bits
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as B
import Data.Function
import Data.Monoid
import qualified Data.Text as T
import Data.Text.Encoding
import Data.Word
import Foreign.C.Types
import Foreign.C.String
import Foreign.ForeignPtr
import Foreign.Marshal.Alloc
import Foreign.Marshal.Utils
import Foreign.Ptr
import Foreign.Storable
import System.IO
import System.IO.Error
import System.IO.Unsafe
import Data.VCDIFF.Types

#ifndef SIZEOF_SIZE_T
#define SIZEOF_SIZE_T __SIZEOF_SIZE_T__
#define SIZEOF_UNSIGNED_INT __SIZEOF_INT__
#define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__
#define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__
#define static_assert(...)
#endif
#include <xdelta3.h>


newtype Stream = Stream (ForeignPtr Stream)

-- | Settings for the secondary compressor.
data CompressorConfig = CompressorConfig
  { ngroups     :: Usize_t -- ^ Number of DJW Huffman groups.
  , sector_size :: Usize_t -- ^ Sector size.
  , inefficient :: Int     -- ^ If true, ignore efficiency check [avoid XD3_NOSECOND].
  }


matcher :: SMatchSelect -> StringMatcher
matcher select = unsafePerformIO $ do
    let config = (init_config (mempty :: Flags)) { winsize = 40, smatch_cfg = Right select }
    Stream fptr <- throwXD $ config_stream config
    m <- withForeignPtr fptr $ \stream -> do
            let smatcher = (#ptr xd3_stream, smatcher) stream
                nmptr = (#ptr xd3_smatcher, name) smatcher
            nm <- peekCString nmptr
            StringMatcher nm
                <$> (#peek xd3_smatcher, string_match) smatcher
                <*> (#peek xd3_smatcher, large_look) smatcher
                <*> (#peek xd3_smatcher, large_step) smatcher
                <*> (#peek xd3_smatcher, small_look) smatcher
                <*> (#peek xd3_smatcher, small_chain) smatcher
                <*> (#peek xd3_smatcher, small_lchain) smatcher
                <*> (#peek xd3_smatcher, max_lazy) smatcher
                <*> (#peek xd3_smatcher, long_enough) smatcher
    finalizeForeignPtr fptr
    return m

-- | Default configured value of stream->winsize.  If the program
-- supplies xd3_encode_input() with data smaller than winsize the
-- stream will automatically buffer the input, otherwise the input
-- buffer is used directly.
pattern XD3_DEFAULT_WINSIZE = #const XD3_DEFAULT_WINSIZE
-- 8 MiB


instance Storable Config where
    sizeOf _ = #const sizeof(xd3_config)
    alignment _ = 1
    poke p cfg = do
        (#poke xd3_config, winsize) p $ winsize cfg
        (#poke xd3_config, flags)   p $ flags cfg
    peek p = do
        winsize <- (#peek xd3_config, winsize) p
        flags <- (#peek xd3_config, flags) p
        return Config
            { winsize = winsize
            , flags   = flags
            }


type CGetBlk = Ptr Stream -> Ptr Xd3_source -> Xoff_t -> IO CInt

foreign import ccall "wrapper"
    wrapGetBlk :: CGetBlk -> IO (FunPtr CGetBlk)

foreign import ccall "wrapper"
    wrapFinalizer :: (Ptr Stream -> IO ()) -> IO (FunPtr (Ptr Stream -> IO ()))

-- | For convenience, zero & initialize the xd3_config structure with specified
-- flags.
init_config :: Flags -> Config
init_config flags = Config
    { winsize = 0
    , flags   = flags
    }

foreign import ccall "xdelta3.h xd3_config_stream" xd3_config_stream :: Ptr Stream -> Ptr xd3_config -> IO ErrorCode

-- | xd3_free_stream frees all memory allocated for the stream.  The
-- application is responsible for freeing any of the resources it
-- supplied.
foreign import ccall "xdelta3.h xd3_free_stream" xd3_free_stream :: Ptr Stream -> IO ()


-- The xd3_config structure is used to initialize a stream - all data
-- is copied into stream so config may be a temporary variable.  See
-- the [documentation] or comments on the xd3_config structure.
config_stream :: Config -> IO (Either ErrorCode Stream)
config_stream cfg = do
    pstream <- callocBytes (#const sizeof(xd3_stream))
    fptr <- newForeignPtr finalizerFree pstream
    wrapFinalizer xd3_free_stream >>= (`addForeignPtrFinalizer` fptr)
    wrapFinalizer unset_source >>= (`addForeignPtrFinalizer` fptr)
    wrapFinalizer unset_header >>= (`addForeignPtrFinalizer` fptr)
    with cfg $ \pcfg -> do
        code <- xd3_config_stream pstream pcfg
        case code of
            ErrorCode 0 -> return . Right $ Stream fptr
            _ -> do free pstream
                    return . Left $ code

throwXD :: IO (Either ErrorCode a) -> IO a
throwXD action = action >>= either throwIO return

example_setup :: IO Stream
example_setup = do
    let config = (init_config (mempty :: Flags)) { winsize = 32768 }
    stream <- throwXD $ config_stream config
    return stream


newtype Xd3_source = Xd3_source (Ptr Xd3_source)

data CurrentBlock = CurrentBlock
    { blkno   :: !Xoff_t      -- ^ current block number
    , blkSize :: !Usize_t     -- ^ number of bytes on current block: must be >= 0 and <= 'srcBlockSize'
    , blkPtr  :: !(Ptr Word8) -- ^ current block array
    }

-- XD3_TOOFARBACK block is too old
-- XD3_INVALID_INPUT
type GetBlock = CurrentBlock -> Xoff_t -> IO (Either ErrorCode CurrentBlock)

data Source = Source
    { srcName       :: String  -- ^ name for debug/print purposes
    , srcBlockSize  :: Usize_t -- ^ block size
    , srcMaxWinSize :: Xoff_t  -- ^ maximum visible buffer
    , srcGetBlock   :: Maybe GetBlock
    }

foreign import ccall "xdelta3.h xd3_set_source" xd3_set_source :: Ptr Stream -> Ptr Xd3_source -> IO ErrorCode

requestedBlockNumber :: Stream -> IO Xoff_t
requestedBlockNumber (Stream fptr) = withForeignPtr fptr $ \stream -> do
    psrc <- (#peek xd3_stream, src) stream
    (#peek xd3_source, getblkno) psrc

peekCurrentBlock :: Stream -> IO CurrentBlock
peekCurrentBlock (Stream fptr) =  withForeignPtr fptr $ \stream -> do
    psrc <- (#peek xd3_stream, src) stream
    CurrentBlock <$> (#peek xd3_source, curblkno) psrc
                 <*> (#peek xd3_source, onblk) psrc
                 <*> (#peek xd3_source, curblk) psrc

pokeCurrentBlock :: Stream -> CurrentBlock -> IO ()
pokeCurrentBlock (Stream fptr) (CurrentBlock no sz ptr) = withForeignPtr fptr $ \stream -> do
    psrc <- (#peek xd3_stream, src) stream
    (#poke xd3_source, curblkno) psrc no
    (#poke xd3_source, onblk   ) psrc sz
    (#poke xd3_source, curblk  ) psrc ptr

nextOut :: Stream -> ((Ptr Word8, Int) -> IO a) -> IO a
nextOut (Stream fptr) action = withForeignPtr fptr $ \stream -> do
    buf <- (,) <$> (#peek xd3_stream, next_out) stream
               <*> (#peek xd3_stream, avail_out) stream
    a <- action buf
    xd3_consume_output stream
    return a

unset_source :: Ptr Stream -> IO ()
unset_source stream = do
    previous_src <- (#peek xd3_stream, src) stream
    when (previous_src /= nullPtr) $ free previous_src
    (#poke xd3_stream, src) stream nullPtr

unset_header :: Ptr Stream -> IO ()
unset_header stream = do
    hdr <- (#peek xd3_stream, enc_appheader) stream
    when (hdr /= nullPtr) $ free hdr
    (#poke xd3_stream, enc_appheader) stream nullPtr


-- | This function informs the encoder or decoder that source matching
-- (i.e., delta-compression) is possible.  For encoding, this should
-- be called before the first xd3_encode_input.  A NULL source is
-- ignored.  For decoding, this should be called before the first
-- window is decoded, but the appheader may be read first
-- (XD3_GOTHEADER).  After decoding the header, call xd3_set_source()
-- if you have a source file.  Note: if (stream->dec_win_ind & VCD_SOURCE)
-- is true, it means the first window expects there to be a source file.
set_source :: Stream -> Source -> IO ErrorCode
set_source (Stream fptr) src = withForeignPtr fptr $ \stream -> do
    unset_source stream
    let bsname = encodeUtf8 $ T.pack $ srcName src
    psrc <- callocBytes $ 1 + B.length bsname + (#const sizeof(xd3_source))
    let pname = castPtr psrc `plusPtr` (#const sizeof(xd3_source))
        copyname ptr (w:ws) = poke ptr w >> copyname (plusPtr ptr 1) ws
        copyname ptr []     = poke ptr (0 :: Word8)
    copyname pname (B.unpack bsname)
    (#poke xd3_source, blksize    ) psrc $ srcBlockSize src
    (#poke xd3_source, name       ) psrc pname
    (#poke xd3_source, max_winsize) psrc $ srcMaxWinSize src
    (#poke xd3_source, curblkno   ) psrc (maxBound :: Xoff_t)
    srcGetBlock src `forM_` \getBlock -> do
        cgetblk <- wrapGetBlk $ \stream psrc xoff -> do
            curblk <- CurrentBlock
                      <$> (#peek xd3_source, curblkno) psrc
                      <*> (#peek xd3_source, onblk) psrc
                      <*> (#peek xd3_source, curblk) psrc
            ret <- getBlock curblk xoff
            case ret of
                Left (ErrorCode ecode) -> return ecode
                Right (CurrentBlock no sz ptr) -> do
                    (#poke xd3_source, curblkno) psrc no
                    (#poke xd3_source, onblk   ) psrc sz
                    (#poke xd3_source, curblk  ) psrc ptr
                    return 0
        (#poke xd3_stream, getblk) stream cgetblk -- Warning: xdelta3.h documents this as an internal field.
                                                  -- It's possible to comply with the documentation using the
                                                  -- 'ioh' field to dispatch, but that would be awkward and
                                                  -- inefficient.
    xd3_set_source stream psrc


-- | Checks for legal flag changes.
foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Stream -> Flags -> IO ()

foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Stream -> Ptr a -> Usize_t -> IO ()
foreign import ccall "xdelta3.h xd3_decode_input" xd3_decode_input  :: Ptr Stream -> IO ErrorCode
foreign import ccall "xdelta3.h xd3_encode_input" xd3_encode_input  :: Ptr Stream -> IO ErrorCode

set_flags (Stream fp) f = withForeignPtr fp $ (`xd3_set_flags` f)
avail_input (Stream fp) p sz = withForeignPtr fp (\stream -> xd3_avail_input stream p sz)
decode_input (Stream fp) = withForeignPtr fp xd3_decode_input
encode_input (Stream fp) = withForeignPtr fp xd3_encode_input

-- xd3_get_appheader may be called in the decoder after XD3_GOTHEADER.
-- For convenience, the decoder always adds a single byte padding to
-- the end of the application header, which is set to zero in case the
-- application header is a string.
foreign import ccall "xdelta3.h xd3_get_appheader" xd3_get_appheader :: Ptr Stream -> Ptr (Ptr Word8) -> Ptr Usize_t -> IO ErrorCode

foreign import ccall "xdelta3.h xd3_set_appheader" xd3_set_appheader :: Ptr Stream -> Ptr Word8 -> Usize_t -> IO ()

-- output//source/
appHeader :: Stream -> IO (Maybe String)
appHeader (Stream fptr) = withForeignPtr fptr $ \stream ->
                                       alloca $ \pp ->
                                       alloca $ \psz -> do
    xd3_get_appheader stream pp psz >>= \case
        XD3_SUCCESS -> peek pp >>= \case
                          hdr | hdr == nullPtr -> return Nothing
                              | otherwise      -> do sz <- peek psz
                                                     Just <$> peekCStringLen (castPtr pp,fromIntegral sz)
        _           -> return Nothing


-- | This should be called before the first call to xd3_encode_input() to
-- include application-specific data in the VCDIFF header.
--
-- Note: This is used for encoding and is not the same field retrieved by
-- 'appHeader'.
setAppHeader :: Stream -> String -> IO ()
setAppHeader (Stream fptr) hdr = withForeignPtr fptr $ \stream -> do
    withCStringLen hdr $ \(dta,sz) -> do
        phdr <- mallocBytes sz
        copyBytes phdr (castPtr dta) sz
        xd3_set_appheader stream phdr (fromIntegral sz)


sourceFromHandle :: Usize_t -> Ptr Word8 -> Handle -> Source
sourceFromHandle blksize ptr h = Source
    { srcName       = "sourceFromHandle"
    , srcBlockSize  = blksize
    , srcMaxWinSize = blksize
    , srcGetBlock   = Just $ \_ num -> do
            hSeek h AbsoluteSeek (fromIntegral blksize * fromIntegral num)
            cnt <- hGetBuf h ptr (fromIntegral blksize)
            return $ Right (CurrentBlock num (fromIntegral cnt) ptr)
        `catchIOError` \_ -> return $ Left XD3_TOOFARBACK
    }

example_set_source stream = do
    h <- openFile "source-file.bin" ReadMode
    buf <- mallocBytes 32768
    let source = (sourceFromHandle 32768 buf h) { srcName = "source-file.bin" }
    ret <- set_source stream source
    return ret

withFileSource :: Config -> FilePath -> Int -> (Stream -> IO a) -> IO a
withFileSource cfg fname blksize action = do
    stream <- throwXD $ config_stream cfg
    withFile fname ReadMode $ \h -> do
        allocaBytes blksize $ \buf -> do
            code <- set_source stream (sourceFromHandle (fromIntegral blksize) buf h) { srcName = fname }
            case code of
                XD3_SUCCESS -> action stream
                _           -> throwIO code


example_input_loop stream inp = do
    allocaBytes 4096 $ \indata -> fix $ \loop -> do
        insize <- hGetBuf inp indata 4096
        let reached_EOF = insize < 4096
        when reached_EOF $ set_flags stream XD3_FLUSH
        avail_input stream indata (fromIntegral insize)
        fix $ \process -> do
            ret <- decode_input stream
            case ret of
                XD3_INPUT -> when (not reached_EOF) loop
                XD3_OUTPUT -> do
                    -- todo write data
                    process
                XD3_GETSRCBLK -> do
                    -- todo set source block
                    process
                XD3_GOTHEADER -> process -- No
                XD3_WINSTART  -> process --  action
                XD3_WINFINISH -> process --   neccessary
                _ -> throwIO (userError "Unexpected return code from decode_input.")

{-
data BufferEater m where
    Lazy :: (ByteString -> m -> m) -> BufferEater m
    Strict :: (ByteString -> IO ()) -> BufferEater ()
    -}

data XDeltaMethods m = XDeltaMethods
    { xConfig    :: Config
    , xGetSource :: Xoff_t -> B.ByteString
    , xOutput    :: Ptr Word8 -> Int -> IO m
    , xOnError   :: ErrorCode -> String -> IO m
    , xBlockSize :: Usize_t
    }

-- | This acknowledges receipt of output data, must be called after any
-- XD3_OUTPUT return.
foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Stream -> IO ()

-- -- built in to 'nextOut'
-- acknowledgeOutput :: Stream -> IO ()
-- acknowledgeOutput (Stream fptr) = withForeignPtr fptr $ xd3_consume_output

foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Stream -> IO CString

-- | Gives some extra information about the latest library error, if any
-- is known.
errorString (Stream fptr) = withForeignPtr fptr $ \stream -> do
    cstring <- xd3_errstring stream
    peekCString cstring


xdelta :: Monoid m => XDeltaMethods m -> (Stream -> IO ErrorCode) -> [B.ByteString] -> IO m
xdelta x xxcode_input ds = do
    stream <- throwXD $ config_stream (xConfig x)
    set_source stream Source
            { srcName       = "XDeltaMethods"
            , srcBlockSize  = xBlockSize x
            , srcMaxWinSize = xBlockSize x
            , srcGetBlock   = Nothing
            }
    let go withBlk (d:ds) = do
            let (fp,off,len) = B.toForeignPtr d
            withForeignPtr fp $ \indata0 -> do
                let indata = indata0 `plusPtr` off
                    eof = null ds
                when eof $ set_flags stream XD3_FLUSH
                avail_input stream indata (fromIntegral len)
                go2 withBlk eof ds
        go2 withBlk eof ds = do
            ret <- withBlk $ xxcode_input stream
            case ret of
                XD3_INPUT -> if (not eof) then go withBlk ds else return mempty
                XD3_OUTPUT -> do
                    m' <- nextOut stream (uncurry $ xOutput x)
                    ms <- unsafeInterleaveIO $ go2 withBlk eof ds
                    return $ m' <> ms
                XD3_GETSRCBLK -> do
                    n <- requestedBlockNumber stream
                    let blk = xGetSource x n
                        withBlk' act = let (fp,off,len) = B.toForeignPtr blk
                                       in withForeignPtr fp $ \p -> do
                                           pokeCurrentBlock stream $ CurrentBlock n (fromIntegral len) (plusPtr p off)
                                           act
                    go2 withBlk' eof ds
                XD3_GOTHEADER -> go2 withBlk eof ds -- No
                XD3_WINSTART  -> go2 withBlk eof ds --  action
                XD3_WINFINISH -> go2 withBlk eof ds --   neccessary
                            -- -- These are set for each XD3_WINFINISH return.
                            -- xd3_encoder_used_source :: Ptr Stream -> IO Bool
                            -- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t
                            -- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t
                e -> do
                    s <- errorString stream
                    xOnError x e s
    go id ds