1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
|
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NondecreasingIndentation #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveFunctor #-}
module Data.XDelta where
import Control.Monad
import Control.Monad.Primitive
import Control.Monad.ST
import Control.Monad.ST.Unsafe
import Data.BA
import Data.Bits
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B
import qualified Data.ByteString.Internal as B
import qualified Data.ByteString.Lazy as L
import Data.Coerce
import Data.Int
import qualified Data.IntMap as IntMap
import Data.Monoid
import Data.Primitive.Addr
import Data.Primitive.ByteArray
import Data.Primitive.MutVar
import Data.STRef
import qualified Data.Text as T
import Data.Text.Encoding
import Data.Word
import Debug.Trace
import Foreign.C.Types
import Foreign.C.String
import Foreign.ForeignPtr (withForeignPtr)
import Foreign.Ptr
import Foreign.Concurrent
import Foreign.Storable
import Foreign.ForeignPtr (ForeignPtr)
import GHC.Exts
import XDelta.Types
#ifndef SIZEOF_SIZE_T
#define SIZEOF_SIZE_T __SIZEOF_SIZE_T__
#define SIZEOF_UNSIGNED_INT __SIZEOF_INT__
#define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__
#define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__
#define static_assert(...)
#endif
#include <xdelta3.h>
data Stream m = Stream
{ streamArray :: MutableByteArray (PrimState m)
, streamPtr :: ForeignPtr Xd3Stream
, streamSource :: MutVar (PrimState m) (Maybe (MutableByteArray (PrimState m)))
}
foreign import ccall "xdelta3.h xd3_config_stream" xd3_config_stream :: Ptr Xd3Stream -> Ptr Config -> IO ErrorCode
foreign import ccall "xdelta3.h xd3_free_stream" xd3_free_stream :: Ptr Xd3Stream -> IO ()
foreign import ccall "xdelta3.h xd3_abort_stream" xd3_abort_stream :: Ptr Xd3Stream -> IO ()
foreign import ccall "xdelta3.h xd3_close_stream" xd3_close_stream :: Ptr Xd3Stream -> IO ErrorCode
writeCompressorConfig :: PrimMonad m =>
MutableByteArray (PrimState m) -> Int -> CompressorConfig -> m ()
writeCompressorConfig c o sec = do
writeAtByte c (o + #{offset xd3_sec_cfg,ngroups}) (ngroups sec)
writeAtByte c (o + #{offset xd3_sec_cfg,sector_size}) (sector_size sec)
writeAtByte c (o + #{offset xd3_sec_cfg,inefficient}) (inefficient sec)
writeMatcher :: PrimMonad m =>
MutableByteArray (PrimState m) -> Int -> StringMatcher -> m ()
writeMatcher c o sm = do
-- handled elsewhere: const char *name; <- smName :: String
writeAtByte c (o + #{offset xd3_smatcher, string_match }) (smStringMatch sm)
writeAtByte c (o + #{offset xd3_smatcher, large_look }) (smLargeLook sm)
writeAtByte c (o + #{offset xd3_smatcher, large_step }) (smLargeStep sm)
writeAtByte c (o + #{offset xd3_smatcher, small_look }) (smSmallLook sm)
writeAtByte c (o + #{offset xd3_smatcher, small_chain }) (smSmallChain sm)
writeAtByte c (o + #{offset xd3_smatcher, small_lchain }) (smSmallLchain sm)
writeAtByte c (o + #{offset xd3_smatcher, max_lazy }) (smMaxLazy sm)
writeAtByte c (o + #{offset xd3_smatcher, long_enough }) (smLongEnough sm)
ptr :: Addr -> Ptr a
ptr (Addr a) = Ptr a
adr :: Ptr a -> Addr
adr (Ptr a) = Addr a
-- The xd3_config structure is used to initialize a stream - all data
-- is copied into stream so config may be a temporary variable. See
-- the [documentation] or comments on the xd3_config structure.
config_stream :: PrimMonad m => Config -> m (Either ErrorCode (Stream m))
config_stream cfg = do
let (len,n) = case smatch_cfg cfg of
Left m -> let n = encodeUtf8 $ T.pack $ smName m
in ( #{const sizeof(xd3_stream)} + B.length n + 1
, n )
Right _ -> ( #{const sizeof(xd3_stream)}, B.empty )
s <- newPinnedByteArray len
let sptr = ptr (mutableByteArrayContents s) :: Ptr Xd3Stream
fillByteArray s 0 #{const sizeof(xd3_stream)} 0
nptr <- case smatch_cfg cfg of
Right _ -> writeStringAt s #{const sizeof(xd3_stream)} n
Left _ -> return nullPtr
c <- do
c <- newPinnedByteArray #const sizeof(xd3_config)
fillByteArray c 0 #{const sizeof(xd3_config)} 0
writeAtByte c #{offset xd3_config, winsize} (winsize cfg)
writeAtByte c #{offset xd3_config, sprevsz} (sprevsz cfg)
writeAtByte c #{offset xd3_config, iopt_size} (iopt_size cfg)
writeAtByte c #{offset xd3_config, flags} (coerce (flags cfg) :: Word32)
writeCompressorConfig c #{offset xd3_config, sec_data} (sec_data cfg)
writeCompressorConfig c #{offset xd3_config, sec_inst} (sec_inst cfg)
writeCompressorConfig c #{offset xd3_config, sec_addr} (sec_addr cfg)
let msel :: #type xd3_smatch_cfg
msel = either (const #{const XD3_SMATCH_SOFT})
(fromIntegral . fromEnum)
(smatch_cfg cfg)
writeAtByte c #{offset xd3_config, smatch_cfg} msel
case smatch_cfg cfg of
Right _ -> return ()
Left matcher -> do
let o = #offset xd3_config,smatcher_soft
writeAtByte c (o + #{offset xd3_smatcher, name}) nptr
writeMatcher c o matcher
unsafeFreezeByteArray c
let cptr = ptr (byteArrayContents c) :: Ptr Config
srcvar <- newMutVar Nothing
stream <- unsafeIOToPrim $ do
let finalize = do
-- freeHaskellFunPtr: aloc,free,getblk
xd3_abort_stream sptr
xd3_close_stream sptr
xd3_free_stream sptr
fp <- newForeignPtr sptr finalize
return Stream
{ streamArray = s
, streamPtr = fp
, streamSource = srcvar
}
unsafeIOToPrim (xd3_config_stream sptr cptr) >>= \case
XD3_SUCCESS -> return $ c `seq` Right stream
ecode -> return $ Left ecode
writeStringAt :: PrimMonad m => MutableByteArray s -> Int -> B.ByteString -> m (Ptr a)
writeStringAt src o bsname = do
(p,cnt) <- unsafeIOToPrim $ B.unsafeUseAsCStringLen bsname return
let nptr = ptr (mutableByteArrayContents src) `plusPtr` o
copyAddr (adr nptr) (adr p) cnt
writeOffAddr (adr nptr) cnt (0 :: Word8)
return nptr
data Xd3Source
foreign import ccall "xdelta3.h xd3_set_source" xd3_set_source :: Ptr Xd3Stream -> Ptr Xd3Source -> IO ErrorCode
set_source :: PrimMonad m =>
Stream m -> String -- ^ name for debug/print purposes.
-> Usize_t -- ^ block size
-> Xoff_t -- ^ maximum visible buffer
-> m ()
set_source stream nm blksz maxwinsz = do
let bsname = encodeUtf8 $ T.pack nm
src <- newPinnedByteArray $ 1 + B.length bsname + #{const sizeof(xd3_source)}
nptr <- writeStringAt src #{const sizeof(xd3_source)} bsname
writeAtByte src #{offset xd3_source, blksize } blksz
writeAtByte src #{offset xd3_source, name } nptr
writeAtByte src #{offset xd3_source, max_winsize} maxwinsz
writeAtByte src #{offset xd3_source, curblkno } (maxBound :: Xoff_t)
{-
writeAtByte (streamArray stream)
#{offset xd3_stream, getblk}
nullPtr -- xdelta3.h documents this as an internal field.
-}
let strm = ptr (mutableByteArrayContents $ streamArray stream)
srcptr = ptr (mutableByteArrayContents src)
unsafeIOToPrim (xd3_set_source strm srcptr)
writeMutVar (streamSource stream) (Just src)
data XDeltaMethods m u = XDeltaMethods
{ xConfig :: Config
, xGetSource :: Xoff_t -> B.ByteString
, xOutput :: Ptr Word8 -> Int -> m u
, xOnError :: ErrorCode -> String -> m u
, xBlockSize :: Usize_t
, xInterleave :: forall a. m a -> m a
}
-- -- | Checks for legal flag changes.
-- foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Xd3Stream -> Flags -> IO ()
setFlag :: PrimMonad m => Flags -> Stream m -> Bool -> m ()
setFlag b stream wantFlush = do
f <- readAtByte (streamArray stream) #{offset xd3_stream, flags}
writeAtByte (streamArray stream) #{offset xd3_stream, flags}
. (coerce :: Flags -> Word32)
$ if wantFlush then Flags f .|. b
else Flags f .&. complement b
setFlush :: PrimMonad m => Stream m -> Bool -> m ()
setFlush = setFlag XD3_FLUSH
setSkipWindow :: PrimMonad m => Stream m -> Bool -> m ()
setSkipWindow = setFlag XD3_SKIP_WINDOW
-- -- declared static
-- foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Xd3Stream -> Ptr a -> Usize_t -> IO ()
avail_input :: PrimMonad m => Stream m -> Ptr a -> Usize_t -> m ()
avail_input stream p sz = do
writeAtByte (streamArray stream) #{offset xd3_stream, next_in} p
writeAtByte (streamArray stream) #{offset xd3_stream, avail_in} sz
-- | This acknowledges receipt of output data, must be called after any
-- XD3_OUTPUT return.
-- -- declared static
-- foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Xd3Stream -> IO ()
nextOut :: PrimMonad m => Stream m -> ((Ptr Word8, Usize_t) -> m a) -> m a
nextOut stream action = do
buf <- (,)
<$> readAtByte (streamArray stream) #{offset xd3_stream, next_out}
<*> readAtByte (streamArray stream) #{offset xd3_stream, avail_out}
a <- action buf
-- unsafeIOToPrim $ xd3_consume_output (ptr $ mutableByteArrayContents $ streamArray stream)
writeAtByte (streamArray stream) #{offset xd3_stream, avail_out} (0 :: Usize_t)
return a
requestedBlockNumber :: PrimMonad m => Stream m -> m (Maybe Xoff_t)
requestedBlockNumber stream = do
msrc <- readMutVar $ streamSource stream
forM msrc $ \src -> readAtByte src #offset xd3_source, getblkno
data CurrentBlock = CurrentBlock
{ blkno :: !Xoff_t -- ^ current block number
, blkSize :: !Usize_t -- ^ number of bytes on current block: must be >= 0 and <= 'srcBlockSize'
, blkPtr :: !(Ptr Word8) -- ^ current block array
}
-- -- declared static
-- foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Xd3Stream -> IO CString
-- | Gives some extra information about the latest library error, if any
-- is known.
errorString :: PrimMonad m => Stream m -> m String
errorString stream = do
cstring <- readAtByte (streamArray stream) #offset xd3_stream, msg
if cstring /= nullPtr
then unsafeIOToPrim $ peekCString cstring -- TODO: avoid unsafeIOToPrim
else return ""
pokeCurrentBlock :: PrimMonad m => Stream m -> CurrentBlock -> m ()
pokeCurrentBlock stream (CurrentBlock no sz ptr) = do
msrc <- readMutVar $ streamSource stream
forM_ msrc $ \src -> do
writeAtByte src #{offset xd3_source, curblkno} no
writeAtByte src #{offset xd3_source, onblk} sz
writeAtByte src #{offset xd3_source, curblk} ptr
withByteString :: PrimBase m => B.ByteString -> (Ptr b -> Usize_t -> m a) -> m a
withByteString d act =
let (fp,off,len) = B.toForeignPtr d
in unsafeIOToPrim $ withForeignPtr fp $ \ptr -> unsafePrimToIO $ do
act (ptr `plusPtr` off) (fromIntegral len)
xdelta :: (PrimBase m, Monoid u) => XDeltaMethods m u -> (Stream m -> m ErrorCode) -> [B.ByteString] -> m u
xdelta x xxcode_input ds = do
mstream <- config_stream (xConfig x)
either (\e _ -> xOnError x e "config_stream failed")
(flip ($))
mstream $ \stream -> do
set_source stream "xdelta" (xBlockSize x) (xBlockSize x)
let go withBlk (d:ds) = do
let (fp,off,len) = B.toForeignPtr d
eof = null ds
when eof $ setFlush stream True
withByteString d $ \indata len -> do
avail_input stream indata len
go2 withBlk eof ds
go2 withBlk eof ds = do
ret <- withBlk $ xxcode_input stream
case ret of
XD3_INPUT -> if (not eof) then go withBlk ds else return mempty
XD3_OUTPUT -> do
m' <- nextOut stream (\(p,len) -> xOutput x p (fromIntegral len))
ms <- xInterleave x $ undefined -- go2 withBlk eof ds
return $ trace "chunk" m' <> ms
-- XXX: This output is to test for laziness.
XD3_GETSRCBLK -> do
Just n <- requestedBlockNumber stream
let blk = xGetSource x n
withBlk' act = withByteString blk $ \p len -> do
pokeCurrentBlock stream $ CurrentBlock n len p
when (len < xBlockSize x) $ do
Just src <- readMutVar $ streamSource stream
writeAtByte src #{offset xd3_source, eof_known} (1 :: #{type int})
act
go2 withBlk' eof ds
XD3_GOTHEADER -> go2 withBlk eof ds -- No
XD3_WINSTART -> go2 withBlk eof ds -- action
XD3_WINFINISH -> go2 withBlk eof ds -- neccessary
-- -- These are set for each XD3_WINFINISH return.
-- xd3_encoder_used_source :: Ptr Stream -> IO Bool
-- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t
-- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t
e -> do
s <- errorString stream
xOnError x e s
go id ds
foreign import ccall "xdelta3.h xd3_decode_input" xd3_decode_input :: Ptr Xd3Stream -> IO ErrorCode
foreign import ccall "xdelta3.h xd3_encode_input" xd3_encode_input :: Ptr Xd3Stream -> IO ErrorCode
decode_input :: PrimMonad m => Stream m -> m ErrorCode
decode_input stream =
unsafeIOToPrim $ xd3_decode_input (ptr $ mutableByteArrayContents $ streamArray stream)
encode_input :: PrimMonad m => Stream m -> m ErrorCode
encode_input stream =
unsafeIOToPrim $ xd3_encode_input (ptr $ mutableByteArrayContents $ streamArray stream)
newtype XDelta = XDelta L.ByteString
deriving Show
chunksOf :: Usize_t -> L.ByteString -> [B.ByteString]
chunksOf len bs | L.null bs = []
| otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs
in L.toStrict b : chunksOf len bs'
computeDiff :: Config -> L.ByteString -> L.ByteString -> XDeltaFailable XDelta
computeDiff cfg source patched = fmap XDelta $ xdeltaPure encode_input cfg source patched
applyPatch :: Config -> L.ByteString -> XDelta -> XDeltaFailable L.ByteString
applyPatch cfg source (XDelta delta) = xdeltaPure decode_input cfg source delta
data XDeltaFailable x = XError ErrorCode String
| XSuccess x
deriving (Show,Functor)
instance Monoid x => Monoid (XDeltaFailable x) where
mempty = XSuccess mempty
mappend (XSuccess x) (XSuccess y) = XSuccess $ mappend x y
mappend x@XError{} _ = x
mappend _ y@XError{} = y
xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> XDeltaFailable L.ByteString
xdeltaPure codec cfg source delta =
let smap = IntMap.fromList $ zip [0..] (chunksOf 16 source)
x :: XDeltaMethods (ST s) (XDeltaFailable L.ByteString)
x = XDeltaMethods
{ xConfig = cfg
, xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of
Nothing -> B.empty
Just bs -> bs
, xOutput = \ptr len -> unsafeIOToST $ XSuccess . L.fromStrict <$> B.packCStringLen (castPtr ptr,len)
, xOnError = \e s -> return (XError e s) -- :: ErrorCode -> String -> m u
, xBlockSize = 16 -- 4096 -- :: Usize_t
, xInterleave = unsafeInterleaveST -- :: forall a. m a -> m a
-- XXX: Why isn't unsafeInterleaveST making it lazy?
}
ds = chunksOf 16 delta -- L.toChunks delta
in runST $ xdelta x codec ds
defaultConfig :: Config
defaultConfig = Config
{ winsize = 4096
, sprevsz = 0
, iopt_size = 0
, flags = mempty
, sec_data = CompressorConfig 0 0 0
, sec_inst = CompressorConfig 0 0 0
, sec_addr = CompressorConfig 0 0 0
, smatch_cfg = Right SMATCH_DEFAULT
}
|