diff options
author | Joe Crayne <joe@jerkface.net> | 2018-10-27 16:26:10 -0400 |
---|---|---|
committer | Joe Crayne <joe@jerkface.net> | 2018-10-27 16:26:10 -0400 |
commit | 4aab5a236e578f3cd97566bc142027e06e955f73 (patch) | |
tree | 9aeba26a693c8d17dfc66d6d98b2209a0be6b8b6 /haskell/Data/VCDIFF.hs | |
parent | 2d01ddf9bffb7be441e2cf1c7071240148839ab5 (diff) |
build fix
Diffstat (limited to 'haskell/Data/VCDIFF.hs')
-rw-r--r-- | haskell/Data/VCDIFF.hs | 255 |
1 files changed, 255 insertions, 0 deletions
diff --git a/haskell/Data/VCDIFF.hs b/haskell/Data/VCDIFF.hs new file mode 100644 index 0000000..a776052 --- /dev/null +++ b/haskell/Data/VCDIFF.hs | |||
@@ -0,0 +1,255 @@ | |||
1 | {-# LANGUAGE BangPatterns #-} | ||
2 | {-# LANGUAGE DataKinds #-} | ||
3 | {-# LANGUAGE DeriveFunctor #-} | ||
4 | {-# LANGUAGE FlexibleContexts #-} | ||
5 | {-# LANGUAGE FlexibleInstances #-} | ||
6 | {-# LANGUAGE GADTs #-} | ||
7 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} | ||
8 | {-# LANGUAGE LambdaCase #-} | ||
9 | {-# LANGUAGE NondecreasingIndentation #-} | ||
10 | {-# LANGUAGE PatternSynonyms #-} | ||
11 | {-# LANGUAGE RankNTypes #-} | ||
12 | {-# LANGUAGE TypeFamilies #-} | ||
13 | {-# LANGUAGE TypeOperators #-} | ||
14 | module Data.VCDIFF where | ||
15 | |||
16 | import Control.Monad | ||
17 | import Control.Monad.Primitive | ||
18 | import Control.Monad.ST | ||
19 | import Control.Monad.ST.Unsafe | ||
20 | import Data.Bits | ||
21 | import qualified Data.ByteString as B | ||
22 | import qualified Data.ByteString.Unsafe as B | ||
23 | import qualified Data.ByteString.Internal as B | ||
24 | import qualified Data.ByteString.Lazy as L | ||
25 | import Data.Coerce | ||
26 | import Data.Int | ||
27 | import qualified Data.IntMap as IntMap | ||
28 | import Data.Monoid | ||
29 | import Data.Primitive.Addr | ||
30 | import Data.Primitive.ByteArray | ||
31 | import Data.Primitive.ByteArray.Util | ||
32 | import Data.Primitive.MutVar | ||
33 | import Data.STRef | ||
34 | import qualified Data.Text as T | ||
35 | import Data.Text.Encoding | ||
36 | import Data.Word | ||
37 | import Foreign.C.Types | ||
38 | import Foreign.C.String | ||
39 | import Foreign.ForeignPtr (withForeignPtr) | ||
40 | import Foreign.Ptr | ||
41 | import Foreign.Concurrent | ||
42 | import Foreign.Storable | ||
43 | import Foreign.ForeignPtr (ForeignPtr) | ||
44 | import GHC.Exts | ||
45 | import GHC.TypeLits | ||
46 | |||
47 | import Data.VCDIFF.Types | ||
48 | import Data.VCDIFF.XDelta | ||
49 | |||
50 | data Stream m = Stream | ||
51 | { streamArray :: StreamArray m | ||
52 | , streamPtr :: ForeignPtr Xd3Stream -- ^ HACK: this is only used to attach a finalizer | ||
53 | -- to 'streamArray'. Don't use this pointer. | ||
54 | -- This would be unnecessary if I could create a | ||
55 | -- MutableByteArray with a finalizer attached. | ||
56 | , streamSource :: MutVar (PrimState m) (Maybe (Source m)) | ||
57 | } | ||
58 | |||
59 | |||
60 | -- The xd3_config structure is used to initialize a stream - all data | ||
61 | -- is copied into stream so config may be a temporary variable. See | ||
62 | -- the [documentation] or comments on the xd3_config structure. | ||
63 | config_stream :: PrimMonad m => Config -> m (Either ErrorCode (Stream m)) | ||
64 | config_stream cfg = do | ||
65 | (s,nptr) <- newStreamArray (either (Just . smName) (const Nothing) (smatch_cfg cfg)) | ||
66 | c <- packConfig nptr cfg | ||
67 | let cptr = ptr (byteArrayContents c) :: Ptr Config | ||
68 | sptr = streamArrayPtr s | ||
69 | srcvar <- newMutVar Nothing | ||
70 | stream <- unsafeIOToPrim $ do | ||
71 | let finalize = do | ||
72 | -- freeHaskellFunPtr: aloc,free,getblk | ||
73 | xd3_abort_stream sptr | ||
74 | xd3_close_stream sptr | ||
75 | xd3_free_stream sptr | ||
76 | seq srcvar $ seq s $ return () -- Keep array s alive until the ffi functions finish. | ||
77 | fp <- newForeignPtr sptr finalize | ||
78 | return Stream | ||
79 | { streamArray = s | ||
80 | , streamPtr = fp | ||
81 | , streamSource = srcvar | ||
82 | } | ||
83 | unsafeIOToPrim (xd3_config_stream sptr cptr) >>= \case | ||
84 | XD3_SUCCESS -> return $ c `seq` Right stream | ||
85 | ecode -> return $ Left ecode | ||
86 | |||
87 | |||
88 | set_source :: PrimMonad m => | ||
89 | Stream m -> String -- ^ name for debug/print purposes. | ||
90 | -> Usize_t -- ^ block size | ||
91 | -> Xoff_t -- ^ maximum visible buffer (Suggested: set same as block size). | ||
92 | -- Rounds up to approx 16k. | ||
93 | -> m () | ||
94 | set_source stream nm blksz maxwinsz = do | ||
95 | src <- newSource nm blksz maxwinsz | ||
96 | {- | ||
97 | writeAtByte (streamArray stream) | ||
98 | #{offset xd3_stream, getblk} | ||
99 | nullPtr -- xdelta3.h documents this as an internal field. | ||
100 | -} | ||
101 | let strm = streamArrayPtr $ streamArray stream | ||
102 | unsafeIOToPrim (xd3_set_source strm $ sourcePtr src) | ||
103 | writeMutVar (streamSource stream) (Just src) | ||
104 | |||
105 | data XDeltaMethods m u = XDeltaMethods | ||
106 | { xConfig :: Config | ||
107 | , xGetSource :: Xoff_t -> B.ByteString | ||
108 | , xOutput :: Ptr Word8 -> Int -> m u | ||
109 | , xOnError :: ErrorCode -> String -> m u | ||
110 | , xBlockSize :: Usize_t | ||
111 | , xInterleave :: forall a. m a -> m a | ||
112 | } | ||
113 | |||
114 | -- -- | Checks for legal flag changes. | ||
115 | -- foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Xd3Stream -> Flags -> IO () | ||
116 | |||
117 | -- -- declared static | ||
118 | -- foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Xd3Stream -> Ptr a -> Usize_t -> IO () | ||
119 | |||
120 | -- | This acknowledges receipt of output data, must be called after any | ||
121 | -- XD3_OUTPUT return. | ||
122 | -- -- declared static | ||
123 | -- foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Xd3Stream -> IO () | ||
124 | |||
125 | requestedBlockNumber :: PrimMonad m => Stream m -> m (Maybe Xoff_t) | ||
126 | requestedBlockNumber stream = do | ||
127 | msrc <- readMutVar $ streamSource stream | ||
128 | forM msrc sourceRequestedBlocknumber | ||
129 | |||
130 | -- -- declared static | ||
131 | -- foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Xd3Stream -> IO CString | ||
132 | |||
133 | pokeCurrentBlock :: PrimMonad m => Stream m -> CurrentBlock -> m () | ||
134 | pokeCurrentBlock stream blk = do | ||
135 | msrc <- readMutVar $ streamSource stream | ||
136 | forM_ msrc (`sourceWriteCurrentBlock` blk) | ||
137 | |||
138 | withByteString :: PrimBase m => B.ByteString -> (Ptr b -> Usize_t -> m a) -> m a | ||
139 | withByteString d act = | ||
140 | let (fp,off,len) = B.toForeignPtr d | ||
141 | in unsafeIOToPrim $ withForeignPtr fp $ \ptr -> unsafePrimToIO $ do | ||
142 | act (ptr `plusPtr` off) (fromIntegral len) | ||
143 | |||
144 | xdelta :: (PrimBase m, Monoid u) => XDeltaMethods m u -> (Stream m -> m ErrorCode) -> [B.ByteString] -> m u | ||
145 | xdelta x xxcode_input ds = do | ||
146 | mstream <- config_stream (xConfig x) | ||
147 | either (\e _ -> xOnError x e "config_stream failed") | ||
148 | (flip ($)) | ||
149 | mstream $ \stream -> do | ||
150 | set_source stream "VCDIFF" (xBlockSize x) (xBlockSize x) | ||
151 | let go withBlk [] = return mempty | ||
152 | go withBlk (d:ds) = do | ||
153 | let (fp,off,len) = B.toForeignPtr d | ||
154 | eof = null ds | ||
155 | when eof $ setFlush (streamArray stream) True | ||
156 | withByteString d $ \indata len -> do | ||
157 | avail_input (streamArray stream) indata len | ||
158 | go2 withBlk eof ds | ||
159 | go2 withBlk eof ds = do | ||
160 | ret <- withBlk $ xxcode_input stream | ||
161 | case ret of | ||
162 | XD3_INPUT -> if (not eof) then go withBlk ds else return mempty | ||
163 | XD3_OUTPUT -> do | ||
164 | m' <- nextOut (streamArray stream) (\(p,len) -> xOutput x p (fromIntegral len)) | ||
165 | ms <- xInterleave x $ go2 withBlk eof ds | ||
166 | return $ m' <> ms | ||
167 | XD3_GETSRCBLK -> do | ||
168 | Just n <- requestedBlockNumber stream | ||
169 | let blk = xGetSource x n | ||
170 | withBlk' act = withByteString blk $ \p len -> do | ||
171 | pokeCurrentBlock stream $ CurrentBlock n len p | ||
172 | when (len < xBlockSize x) $ do | ||
173 | Just src <- readMutVar $ streamSource stream | ||
174 | sourceWriteEOFKnown src True | ||
175 | act | ||
176 | go2 withBlk' eof ds | ||
177 | XD3_GOTHEADER -> go2 withBlk eof ds -- No | ||
178 | XD3_WINSTART -> go2 withBlk eof ds -- action | ||
179 | XD3_WINFINISH -> go2 withBlk eof ds -- neccessary | ||
180 | -- -- These are set for each XD3_WINFINISH return. | ||
181 | -- xd3_encoder_used_source :: Ptr Stream -> IO Bool | ||
182 | -- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t | ||
183 | -- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t | ||
184 | e -> do | ||
185 | s <- errorString (streamArray stream) | ||
186 | xOnError x e s | ||
187 | xInterleave x $ go id ds | ||
188 | |||
189 | |||
190 | decode_input :: PrimMonad m => Stream m -> m ErrorCode | ||
191 | decode_input stream = | ||
192 | unsafeIOToPrim $ xd3_decode_input (streamArrayPtr $ streamArray stream) | ||
193 | |||
194 | encode_input :: PrimMonad m => Stream m -> m ErrorCode | ||
195 | encode_input stream = | ||
196 | unsafeIOToPrim $ xd3_encode_input (streamArrayPtr $ streamArray stream) | ||
197 | |||
198 | -- RFC 3284 | ||
199 | newtype VCDIFF = VCDIFF L.ByteString | ||
200 | deriving Show | ||
201 | |||
202 | chunksOf :: Usize_t -> L.ByteString -> [B.ByteString] | ||
203 | chunksOf len bs | L.null bs = [] | ||
204 | | otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs | ||
205 | in L.toStrict b : chunksOf len bs' | ||
206 | |||
207 | computeDiff :: Config -> L.ByteString -> L.ByteString -> Result VCDIFF | ||
208 | computeDiff cfg source patched = fmap VCDIFF $ xdeltaPure encode_input cfg source patched | ||
209 | |||
210 | applyPatch :: Config -> L.ByteString -> VCDIFF -> Result L.ByteString | ||
211 | applyPatch cfg source (VCDIFF delta) = xdeltaPure decode_input cfg source delta | ||
212 | |||
213 | data Result x = Result | ||
214 | { result :: x -- ^ A possibly invalid result. To consume a lazy stream with fusion, avoid | ||
215 | -- evaluating 'resultError' until this field is fully processed. | ||
216 | , resultError :: Maybe (ErrorCode,String) | ||
217 | -- ^ If something went wrong while producing 'result', this | ||
218 | -- is an error code and message indicating what. | ||
219 | } deriving (Show,Functor) | ||
220 | |||
221 | instance Monoid x => Monoid (Result x) where | ||
222 | mempty = Result mempty Nothing | ||
223 | mappend (Result x xe) y = Result (mappend x $ result y) (maybe (resultError y) Just xe) | ||
224 | |||
225 | xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> Result L.ByteString | ||
226 | xdeltaPure codec cfg source input = | ||
227 | let bsize = chunk_size cfg | ||
228 | ds = chunksOf bsize input | ||
229 | smap = IntMap.fromList $ zip [0..] (chunksOf bsize source) | ||
230 | x :: XDeltaMethods (ST s) (Result L.ByteString) | ||
231 | x = XDeltaMethods | ||
232 | { xConfig = cfg | ||
233 | , xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of | ||
234 | Nothing -> B.empty | ||
235 | Just bs -> bs | ||
236 | , xOutput = \ptr len -> unsafeIOToST $ flip Result Nothing . L.fromStrict | ||
237 | <$> B.packCStringLen (castPtr ptr,len) | ||
238 | , xOnError = \e s -> return (Result L.empty (Just (e,s))) | ||
239 | , xBlockSize = bsize | ||
240 | , xInterleave = unsafeInterleaveST | ||
241 | } | ||
242 | in runST $ xdelta x codec ds | ||
243 | |||
244 | defaultConfig :: Config | ||
245 | defaultConfig = Config | ||
246 | { winsize = XD3_DEFAULT_WINSIZE | ||
247 | , sprevsz = XD3_DEFAULT_SPREVSZ | ||
248 | , iopt_size = XD3_DEFAULT_IOPT_SIZE | ||
249 | , flags = mempty | ||
250 | , sec_data = CompressorConfig 0 0 0 | ||
251 | , sec_inst = CompressorConfig 0 0 0 | ||
252 | , sec_addr = CompressorConfig 0 0 0 | ||
253 | , smatch_cfg = Right SMATCH_DEFAULT | ||
254 | , chunk_size = 4096 | ||
255 | } | ||