summaryrefslogtreecommitdiff
path: root/haskell/Data/VCDIFF.hs
diff options
context:
space:
mode:
authorJoe Crayne <joe@jerkface.net>2018-10-27 16:26:10 -0400
committerJoe Crayne <joe@jerkface.net>2018-10-27 16:26:10 -0400
commit4aab5a236e578f3cd97566bc142027e06e955f73 (patch)
tree9aeba26a693c8d17dfc66d6d98b2209a0be6b8b6 /haskell/Data/VCDIFF.hs
parent2d01ddf9bffb7be441e2cf1c7071240148839ab5 (diff)
build fix
Diffstat (limited to 'haskell/Data/VCDIFF.hs')
-rw-r--r--haskell/Data/VCDIFF.hs255
1 files changed, 255 insertions, 0 deletions
diff --git a/haskell/Data/VCDIFF.hs b/haskell/Data/VCDIFF.hs
new file mode 100644
index 0000000..a776052
--- /dev/null
+++ b/haskell/Data/VCDIFF.hs
@@ -0,0 +1,255 @@
1{-# LANGUAGE BangPatterns #-}
2{-# LANGUAGE DataKinds #-}
3{-# LANGUAGE DeriveFunctor #-}
4{-# LANGUAGE FlexibleContexts #-}
5{-# LANGUAGE FlexibleInstances #-}
6{-# LANGUAGE GADTs #-}
7{-# LANGUAGE GeneralizedNewtypeDeriving #-}
8{-# LANGUAGE LambdaCase #-}
9{-# LANGUAGE NondecreasingIndentation #-}
10{-# LANGUAGE PatternSynonyms #-}
11{-# LANGUAGE RankNTypes #-}
12{-# LANGUAGE TypeFamilies #-}
13{-# LANGUAGE TypeOperators #-}
14module Data.VCDIFF where
15
16import Control.Monad
17import Control.Monad.Primitive
18import Control.Monad.ST
19import Control.Monad.ST.Unsafe
20import Data.Bits
21import qualified Data.ByteString as B
22import qualified Data.ByteString.Unsafe as B
23import qualified Data.ByteString.Internal as B
24import qualified Data.ByteString.Lazy as L
25import Data.Coerce
26import Data.Int
27import qualified Data.IntMap as IntMap
28import Data.Monoid
29import Data.Primitive.Addr
30import Data.Primitive.ByteArray
31import Data.Primitive.ByteArray.Util
32import Data.Primitive.MutVar
33import Data.STRef
34import qualified Data.Text as T
35import Data.Text.Encoding
36import Data.Word
37import Foreign.C.Types
38import Foreign.C.String
39import Foreign.ForeignPtr (withForeignPtr)
40import Foreign.Ptr
41import Foreign.Concurrent
42import Foreign.Storable
43import Foreign.ForeignPtr (ForeignPtr)
44import GHC.Exts
45import GHC.TypeLits
46
47import Data.VCDIFF.Types
48import Data.VCDIFF.XDelta
49
50data Stream m = Stream
51 { streamArray :: StreamArray m
52 , streamPtr :: ForeignPtr Xd3Stream -- ^ HACK: this is only used to attach a finalizer
53 -- to 'streamArray'. Don't use this pointer.
54 -- This would be unnecessary if I could create a
55 -- MutableByteArray with a finalizer attached.
56 , streamSource :: MutVar (PrimState m) (Maybe (Source m))
57 }
58
59
60-- The xd3_config structure is used to initialize a stream - all data
61-- is copied into stream so config may be a temporary variable. See
62-- the [documentation] or comments on the xd3_config structure.
63config_stream :: PrimMonad m => Config -> m (Either ErrorCode (Stream m))
64config_stream cfg = do
65 (s,nptr) <- newStreamArray (either (Just . smName) (const Nothing) (smatch_cfg cfg))
66 c <- packConfig nptr cfg
67 let cptr = ptr (byteArrayContents c) :: Ptr Config
68 sptr = streamArrayPtr s
69 srcvar <- newMutVar Nothing
70 stream <- unsafeIOToPrim $ do
71 let finalize = do
72 -- freeHaskellFunPtr: aloc,free,getblk
73 xd3_abort_stream sptr
74 xd3_close_stream sptr
75 xd3_free_stream sptr
76 seq srcvar $ seq s $ return () -- Keep array s alive until the ffi functions finish.
77 fp <- newForeignPtr sptr finalize
78 return Stream
79 { streamArray = s
80 , streamPtr = fp
81 , streamSource = srcvar
82 }
83 unsafeIOToPrim (xd3_config_stream sptr cptr) >>= \case
84 XD3_SUCCESS -> return $ c `seq` Right stream
85 ecode -> return $ Left ecode
86
87
88set_source :: PrimMonad m =>
89 Stream m -> String -- ^ name for debug/print purposes.
90 -> Usize_t -- ^ block size
91 -> Xoff_t -- ^ maximum visible buffer (Suggested: set same as block size).
92 -- Rounds up to approx 16k.
93 -> m ()
94set_source stream nm blksz maxwinsz = do
95 src <- newSource nm blksz maxwinsz
96 {-
97 writeAtByte (streamArray stream)
98 #{offset xd3_stream, getblk}
99 nullPtr -- xdelta3.h documents this as an internal field.
100 -}
101 let strm = streamArrayPtr $ streamArray stream
102 unsafeIOToPrim (xd3_set_source strm $ sourcePtr src)
103 writeMutVar (streamSource stream) (Just src)
104
105data XDeltaMethods m u = XDeltaMethods
106 { xConfig :: Config
107 , xGetSource :: Xoff_t -> B.ByteString
108 , xOutput :: Ptr Word8 -> Int -> m u
109 , xOnError :: ErrorCode -> String -> m u
110 , xBlockSize :: Usize_t
111 , xInterleave :: forall a. m a -> m a
112 }
113
114-- -- | Checks for legal flag changes.
115-- foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Xd3Stream -> Flags -> IO ()
116
117-- -- declared static
118-- foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Xd3Stream -> Ptr a -> Usize_t -> IO ()
119
120-- | This acknowledges receipt of output data, must be called after any
121-- XD3_OUTPUT return.
122-- -- declared static
123-- foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Xd3Stream -> IO ()
124
125requestedBlockNumber :: PrimMonad m => Stream m -> m (Maybe Xoff_t)
126requestedBlockNumber stream = do
127 msrc <- readMutVar $ streamSource stream
128 forM msrc sourceRequestedBlocknumber
129
130-- -- declared static
131-- foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Xd3Stream -> IO CString
132
133pokeCurrentBlock :: PrimMonad m => Stream m -> CurrentBlock -> m ()
134pokeCurrentBlock stream blk = do
135 msrc <- readMutVar $ streamSource stream
136 forM_ msrc (`sourceWriteCurrentBlock` blk)
137
138withByteString :: PrimBase m => B.ByteString -> (Ptr b -> Usize_t -> m a) -> m a
139withByteString d act =
140 let (fp,off,len) = B.toForeignPtr d
141 in unsafeIOToPrim $ withForeignPtr fp $ \ptr -> unsafePrimToIO $ do
142 act (ptr `plusPtr` off) (fromIntegral len)
143
144xdelta :: (PrimBase m, Monoid u) => XDeltaMethods m u -> (Stream m -> m ErrorCode) -> [B.ByteString] -> m u
145xdelta x xxcode_input ds = do
146 mstream <- config_stream (xConfig x)
147 either (\e _ -> xOnError x e "config_stream failed")
148 (flip ($))
149 mstream $ \stream -> do
150 set_source stream "VCDIFF" (xBlockSize x) (xBlockSize x)
151 let go withBlk [] = return mempty
152 go withBlk (d:ds) = do
153 let (fp,off,len) = B.toForeignPtr d
154 eof = null ds
155 when eof $ setFlush (streamArray stream) True
156 withByteString d $ \indata len -> do
157 avail_input (streamArray stream) indata len
158 go2 withBlk eof ds
159 go2 withBlk eof ds = do
160 ret <- withBlk $ xxcode_input stream
161 case ret of
162 XD3_INPUT -> if (not eof) then go withBlk ds else return mempty
163 XD3_OUTPUT -> do
164 m' <- nextOut (streamArray stream) (\(p,len) -> xOutput x p (fromIntegral len))
165 ms <- xInterleave x $ go2 withBlk eof ds
166 return $ m' <> ms
167 XD3_GETSRCBLK -> do
168 Just n <- requestedBlockNumber stream
169 let blk = xGetSource x n
170 withBlk' act = withByteString blk $ \p len -> do
171 pokeCurrentBlock stream $ CurrentBlock n len p
172 when (len < xBlockSize x) $ do
173 Just src <- readMutVar $ streamSource stream
174 sourceWriteEOFKnown src True
175 act
176 go2 withBlk' eof ds
177 XD3_GOTHEADER -> go2 withBlk eof ds -- No
178 XD3_WINSTART -> go2 withBlk eof ds -- action
179 XD3_WINFINISH -> go2 withBlk eof ds -- neccessary
180 -- -- These are set for each XD3_WINFINISH return.
181 -- xd3_encoder_used_source :: Ptr Stream -> IO Bool
182 -- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t
183 -- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t
184 e -> do
185 s <- errorString (streamArray stream)
186 xOnError x e s
187 xInterleave x $ go id ds
188
189
190decode_input :: PrimMonad m => Stream m -> m ErrorCode
191decode_input stream =
192 unsafeIOToPrim $ xd3_decode_input (streamArrayPtr $ streamArray stream)
193
194encode_input :: PrimMonad m => Stream m -> m ErrorCode
195encode_input stream =
196 unsafeIOToPrim $ xd3_encode_input (streamArrayPtr $ streamArray stream)
197
198-- RFC 3284
199newtype VCDIFF = VCDIFF L.ByteString
200 deriving Show
201
202chunksOf :: Usize_t -> L.ByteString -> [B.ByteString]
203chunksOf len bs | L.null bs = []
204 | otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs
205 in L.toStrict b : chunksOf len bs'
206
207computeDiff :: Config -> L.ByteString -> L.ByteString -> Result VCDIFF
208computeDiff cfg source patched = fmap VCDIFF $ xdeltaPure encode_input cfg source patched
209
210applyPatch :: Config -> L.ByteString -> VCDIFF -> Result L.ByteString
211applyPatch cfg source (VCDIFF delta) = xdeltaPure decode_input cfg source delta
212
213data Result x = Result
214 { result :: x -- ^ A possibly invalid result. To consume a lazy stream with fusion, avoid
215 -- evaluating 'resultError' until this field is fully processed.
216 , resultError :: Maybe (ErrorCode,String)
217 -- ^ If something went wrong while producing 'result', this
218 -- is an error code and message indicating what.
219 } deriving (Show,Functor)
220
221instance Monoid x => Monoid (Result x) where
222 mempty = Result mempty Nothing
223 mappend (Result x xe) y = Result (mappend x $ result y) (maybe (resultError y) Just xe)
224
225xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> Result L.ByteString
226xdeltaPure codec cfg source input =
227 let bsize = chunk_size cfg
228 ds = chunksOf bsize input
229 smap = IntMap.fromList $ zip [0..] (chunksOf bsize source)
230 x :: XDeltaMethods (ST s) (Result L.ByteString)
231 x = XDeltaMethods
232 { xConfig = cfg
233 , xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of
234 Nothing -> B.empty
235 Just bs -> bs
236 , xOutput = \ptr len -> unsafeIOToST $ flip Result Nothing . L.fromStrict
237 <$> B.packCStringLen (castPtr ptr,len)
238 , xOnError = \e s -> return (Result L.empty (Just (e,s)))
239 , xBlockSize = bsize
240 , xInterleave = unsafeInterleaveST
241 }
242 in runST $ xdelta x codec ds
243
244defaultConfig :: Config
245defaultConfig = Config
246 { winsize = XD3_DEFAULT_WINSIZE
247 , sprevsz = XD3_DEFAULT_SPREVSZ
248 , iopt_size = XD3_DEFAULT_IOPT_SIZE
249 , flags = mempty
250 , sec_data = CompressorConfig 0 0 0
251 , sec_inst = CompressorConfig 0 0 0
252 , sec_addr = CompressorConfig 0 0 0
253 , smatch_cfg = Right SMATCH_DEFAULT
254 , chunk_size = 4096
255 }