1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
|
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NondecreasingIndentation #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
module Data.VCDIFF where
import Control.Monad
import Control.Monad.Primitive
import Control.Monad.ST
import Control.Monad.ST.Unsafe
import Data.Bits
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B
import qualified Data.ByteString.Internal as B
import qualified Data.ByteString.Lazy as L
import Data.Coerce
import Data.Int
import qualified Data.IntMap as IntMap
import Data.Monoid
import Data.Primitive.Addr
import Data.Primitive.ByteArray
import Data.Primitive.ByteArray.Util
import Data.Primitive.MutVar
import Data.STRef
import qualified Data.Text as T
import Data.Text.Encoding
import Data.Word
import Foreign.C.Types
import Foreign.C.String
import Foreign.ForeignPtr (withForeignPtr)
import Foreign.Ptr
import Foreign.Concurrent
import Foreign.Storable
import Foreign.ForeignPtr (ForeignPtr)
import GHC.Exts
import GHC.TypeLits
import Data.VCDIFF.Types
import Data.VCDIFF.XDelta
data Stream m = Stream
{ streamArray :: StreamArray m
, streamPtr :: ForeignPtr Xd3Stream -- ^ HACK: this is only used to attach a finalizer
-- to 'streamArray'. Don't use this pointer.
-- This would be unnecessary if I could create a
-- MutableByteArray with a finalizer attached.
, streamSource :: MutVar (PrimState m) (Maybe (Source m))
}
-- The xd3_config structure is used to initialize a stream - all data
-- is copied into stream so config may be a temporary variable. See
-- the [documentation] or comments on the xd3_config structure.
config_stream :: PrimMonad m => Config -> m (Either ErrorCode (Stream m))
config_stream cfg = do
(s,nptr) <- newStreamArray (either (Just . smName) (const Nothing) (smatch_cfg cfg))
c <- packConfig nptr cfg
let cptr = ptr (byteArrayContents c) :: Ptr Config
sptr = streamArrayPtr s
srcvar <- newMutVar Nothing
stream <- unsafeIOToPrim $ do
let finalize = do
-- freeHaskellFunPtr: aloc,free,getblk
xd3_abort_stream sptr
xd3_close_stream sptr
xd3_free_stream sptr
seq srcvar $ seq s $ return () -- Keep array s alive until the ffi functions finish.
fp <- newForeignPtr sptr finalize
return Stream
{ streamArray = s
, streamPtr = fp
, streamSource = srcvar
}
unsafeIOToPrim (xd3_config_stream sptr cptr) >>= \case
XD3_SUCCESS -> return $ c `seq` Right stream
ecode -> return $ Left ecode
set_source :: PrimMonad m =>
Stream m -> String -- ^ name for debug/print purposes.
-> Usize_t -- ^ block size
-> Xoff_t -- ^ maximum visible buffer (Suggested: set same as block size).
-- Rounds up to approx 16k.
-> m ()
set_source stream nm blksz maxwinsz = do
src <- newSource nm blksz maxwinsz
{-
writeAtByte (streamArray stream)
#{offset xd3_stream, getblk}
nullPtr -- xdelta3.h documents this as an internal field.
-}
let strm = streamArrayPtr $ streamArray stream
unsafeIOToPrim (xd3_set_source strm $ sourcePtr src)
writeMutVar (streamSource stream) (Just src)
data XDeltaMethods m u = XDeltaMethods
{ xConfig :: Config
, xGetSource :: Xoff_t -> B.ByteString
, xOutput :: Ptr Word8 -> Int -> m u
, xOnError :: ErrorCode -> String -> m u
, xBlockSize :: Usize_t
, xInterleave :: forall a. m a -> m a
}
-- -- | Checks for legal flag changes.
-- foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Xd3Stream -> Flags -> IO ()
-- -- declared static
-- foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Xd3Stream -> Ptr a -> Usize_t -> IO ()
-- | This acknowledges receipt of output data, must be called after any
-- XD3_OUTPUT return.
-- -- declared static
-- foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Xd3Stream -> IO ()
requestedBlockNumber :: PrimMonad m => Stream m -> m (Maybe Xoff_t)
requestedBlockNumber stream = do
msrc <- readMutVar $ streamSource stream
forM msrc sourceRequestedBlocknumber
-- -- declared static
-- foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Xd3Stream -> IO CString
pokeCurrentBlock :: PrimMonad m => Stream m -> CurrentBlock -> m ()
pokeCurrentBlock stream blk = do
msrc <- readMutVar $ streamSource stream
forM_ msrc (`sourceWriteCurrentBlock` blk)
withByteString :: PrimBase m => B.ByteString -> (Ptr b -> Usize_t -> m a) -> m a
withByteString d act =
let (fp,off,len) = B.toForeignPtr d
in unsafeIOToPrim $ withForeignPtr fp $ \ptr -> unsafePrimToIO $ do
act (ptr `plusPtr` off) (fromIntegral len)
xdelta :: (PrimBase m, Monoid u) => XDeltaMethods m u -> (Stream m -> m ErrorCode) -> [B.ByteString] -> m u
xdelta x xxcode_input ds = do
mstream <- config_stream (xConfig x)
either (\e _ -> xOnError x e "config_stream failed")
(flip ($))
mstream $ \stream -> do
set_source stream "VCDIFF" (xBlockSize x) (xBlockSize x)
let go withBlk [] = return mempty
go withBlk (d:ds) = do
let (fp,off,len) = B.toForeignPtr d
eof = null ds
when eof $ setFlush (streamArray stream) True
withByteString d $ \indata len -> do
avail_input (streamArray stream) indata len
go2 withBlk eof ds
go2 withBlk eof ds = do
ret <- withBlk $ xxcode_input stream
case ret of
XD3_INPUT -> if (not eof) then go withBlk ds else return mempty
XD3_OUTPUT -> do
m' <- nextOut (streamArray stream) (\(p,len) -> xOutput x p (fromIntegral len))
ms <- xInterleave x $ go2 withBlk eof ds
return $ m' <> ms
XD3_GETSRCBLK -> do
Just n <- requestedBlockNumber stream
let blk = xGetSource x n
withBlk' act = withByteString blk $ \p len -> do
pokeCurrentBlock stream $ CurrentBlock n len p
when (len < xBlockSize x) $ do
Just src <- readMutVar $ streamSource stream
sourceWriteEOFKnown src True
act
go2 withBlk' eof ds
XD3_GOTHEADER -> go2 withBlk eof ds -- No
XD3_WINSTART -> go2 withBlk eof ds -- action
XD3_WINFINISH -> go2 withBlk eof ds -- neccessary
-- -- These are set for each XD3_WINFINISH return.
-- xd3_encoder_used_source :: Ptr Stream -> IO Bool
-- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t
-- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t
e -> do
s <- errorString (streamArray stream)
xOnError x e s
xInterleave x $ go id ds
decode_input :: PrimMonad m => Stream m -> m ErrorCode
decode_input stream =
unsafeIOToPrim $ xd3_decode_input (streamArrayPtr $ streamArray stream)
encode_input :: PrimMonad m => Stream m -> m ErrorCode
encode_input stream =
unsafeIOToPrim $ xd3_encode_input (streamArrayPtr $ streamArray stream)
-- RFC 3284
newtype VCDIFF = VCDIFF L.ByteString
deriving Show
chunksOf :: Usize_t -> L.ByteString -> [B.ByteString]
chunksOf len bs | L.null bs = []
| otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs
in L.toStrict b : chunksOf len bs'
computeDiff :: Config -> L.ByteString -> L.ByteString -> Result VCDIFF
computeDiff cfg source patched = fmap VCDIFF $ xdeltaPure encode_input cfg source patched
applyPatch :: Config -> L.ByteString -> VCDIFF -> Result L.ByteString
applyPatch cfg source (VCDIFF delta) = xdeltaPure decode_input cfg source delta
data Result x = Result
{ result :: x -- ^ A possibly invalid result. To consume a lazy stream with fusion, avoid
-- evaluating 'resultError' until this field is fully processed.
, resultError :: Maybe (ErrorCode,String)
-- ^ If something went wrong while producing 'result', this
-- is an error code and message indicating what.
} deriving (Show,Functor)
instance Monoid x => Monoid (Result x) where
mempty = Result mempty Nothing
mappend (Result x xe) y = Result (mappend x $ result y) (maybe (resultError y) Just xe)
xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> Result L.ByteString
xdeltaPure codec cfg source input =
let bsize = chunk_size cfg
ds = chunksOf bsize input
smap = IntMap.fromList $ zip [0..] (chunksOf bsize source)
x :: XDeltaMethods (ST s) (Result L.ByteString)
x = XDeltaMethods
{ xConfig = cfg
, xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of
Nothing -> B.empty
Just bs -> bs
, xOutput = \ptr len -> unsafeIOToST $ flip Result Nothing . L.fromStrict
<$> B.packCStringLen (castPtr ptr,len)
, xOnError = \e s -> return (Result L.empty (Just (e,s)))
, xBlockSize = bsize
, xInterleave = unsafeInterleaveST
}
in runST $ xdelta x codec ds
defaultConfig :: Config
defaultConfig = Config
{ winsize = XD3_DEFAULT_WINSIZE
, sprevsz = XD3_DEFAULT_SPREVSZ
, iopt_size = XD3_DEFAULT_IOPT_SIZE
, flags = mempty
, sec_data = CompressorConfig 0 0 0
, sec_inst = CompressorConfig 0 0 0
, sec_addr = CompressorConfig 0 0 0
, smatch_cfg = Right SMATCH_DEFAULT
, chunk_size = 4096
}
|