summaryrefslogtreecommitdiff
path: root/haskell/Data/VCDIFF.hsc
diff options
context:
space:
mode:
Diffstat (limited to 'haskell/Data/VCDIFF.hsc')
-rw-r--r--haskell/Data/VCDIFF.hsc406
1 files changed, 406 insertions, 0 deletions
diff --git a/haskell/Data/VCDIFF.hsc b/haskell/Data/VCDIFF.hsc
new file mode 100644
index 0000000..5e484e1
--- /dev/null
+++ b/haskell/Data/VCDIFF.hsc
@@ -0,0 +1,406 @@
1{-# OPTIONS_GHC -Wno-partial-type-signatures #-}
2{-# LANGUAGE BangPatterns #-}
3{-# LANGUAGE DataKinds #-}
4{-# LANGUAGE DeriveFunctor #-}
5{-# LANGUAGE FlexibleContexts #-}
6{-# LANGUAGE FlexibleInstances #-}
7{-# LANGUAGE GADTs #-}
8{-# LANGUAGE GeneralizedNewtypeDeriving #-}
9{-# LANGUAGE LambdaCase #-}
10{-# LANGUAGE NondecreasingIndentation #-}
11{-# LANGUAGE PartialTypeSignatures #-}
12{-# LANGUAGE PatternSynonyms #-}
13{-# LANGUAGE RankNTypes #-}
14{-# LANGUAGE TypeFamilies #-}
15{-# LANGUAGE TypeOperators #-}
16module Data.VCDIFF where
17
18import Control.Monad
19import Control.Monad.Primitive
20import Control.Monad.ST
21import Control.Monad.ST.Unsafe
22import Data.Bits
23import qualified Data.ByteString as B
24import qualified Data.ByteString.Unsafe as B
25import qualified Data.ByteString.Internal as B
26import qualified Data.ByteString.Lazy as L
27import Data.Coerce
28import Data.Int
29import qualified Data.IntMap as IntMap
30import Data.Monoid
31import Data.Primitive.Addr
32import Data.Primitive.ByteArray
33import Data.Primitive.ByteArray.Util
34import Data.Primitive.MutVar
35import Data.STRef
36import qualified Data.Text as T
37import Data.Text.Encoding
38import Data.Word
39import Foreign.C.Types
40import Foreign.C.String
41import Foreign.ForeignPtr (withForeignPtr)
42import Foreign.Ptr
43import Foreign.Concurrent
44import Foreign.Storable
45import Foreign.ForeignPtr (ForeignPtr)
46import GHC.Exts
47import GHC.TypeLits
48
49import Data.VCDIFF.Types
50
51#ifndef SIZEOF_SIZE_T
52#define SIZEOF_SIZE_T __SIZEOF_SIZE_T__
53#define SIZEOF_UNSIGNED_INT __SIZEOF_INT__
54#define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__
55#define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__
56#define static_assert(...)
57#endif
58#include <xdelta3.h>
59
60#include "offset.h"
61
62data Stream m = Stream
63 { streamArray :: MutableByteArray (PrimState m)
64 , streamPtr :: ForeignPtr Xd3Stream -- ^ HACK: this is only used to attach a finalizer
65 -- to 'streamArray'. Don't use this pointer.
66 -- This would be unnecessary if I could create a
67 -- MutableByteArray with a finalizer attached.
68 , streamSource :: MutVar (PrimState m) (Maybe (MutableByteArray (PrimState m)))
69 }
70
71foreign import ccall "xdelta3.h xd3_config_stream" xd3_config_stream :: Ptr Xd3Stream -> Ptr Config -> IO ErrorCode
72
73foreign import ccall "xdelta3.h xd3_free_stream" xd3_free_stream :: Ptr Xd3Stream -> IO ()
74
75foreign import ccall "xdelta3.h xd3_abort_stream" xd3_abort_stream :: Ptr Xd3Stream -> IO ()
76
77foreign import ccall "xdelta3.h xd3_close_stream" xd3_close_stream :: Ptr Xd3Stream -> IO ErrorCode
78
79type instance SizeOf Usize_t = #const sizeof(usize_t)
80type instance SizeOf (FunPtr a) = #const sizeof(void(*)())
81type instance SizeOf (Ptr a) = #const sizeof(void*)
82type instance SizeOf #{type int} = #const sizeof(int)
83type instance SizeOf #{type unsigned int} = #const sizeof(unsigned int)
84
85
86
87writeCompressorConfig :: (PrimMonad m, _) => MutableByteArray (PrimState m) -> Offset j -> CompressorConfig -> m ()
88writeCompressorConfig c o sec = do
89 writeAtByte c (o +. #{off xd3_sec_cfg,ngroups}) (ngroups sec)
90 writeAtByte c (o +. #{off xd3_sec_cfg,sector_size}) (sector_size sec)
91 writeAtByte c (o +. #{off xd3_sec_cfg,inefficient}) (inefficient sec)
92
93writeMatcher :: (PrimMonad m, _) => MutableByteArray (PrimState m) -> Offset j -> StringMatcher -> m ()
94writeMatcher c o sm = do
95 -- handled elsewhere: const char *name; <- smName :: String
96 writeAtByte c (o +. #{off xd3_smatcher, string_match }) (smStringMatch sm)
97 writeAtByte c (o +. #{off xd3_smatcher, large_look }) (smLargeLook sm)
98 writeAtByte c (o +. #{off xd3_smatcher, large_step }) (smLargeStep sm)
99 writeAtByte c (o +. #{off xd3_smatcher, small_look }) (smSmallLook sm)
100 writeAtByte c (o +. #{off xd3_smatcher, small_chain }) (smSmallChain sm)
101 writeAtByte c (o +. #{off xd3_smatcher, small_lchain }) (smSmallLchain sm)
102 writeAtByte c (o +. #{off xd3_smatcher, max_lazy }) (smMaxLazy sm)
103 writeAtByte c (o +. #{off xd3_smatcher, long_enough }) (smLongEnough sm)
104
105ptr :: Addr -> Ptr a
106ptr (Addr a) = Ptr a
107
108adr :: Ptr a -> Addr
109adr (Ptr a) = Addr a
110
111-- The xd3_config structure is used to initialize a stream - all data
112-- is copied into stream so config may be a temporary variable. See
113-- the [documentation] or comments on the xd3_config structure.
114config_stream :: PrimMonad m => Config -> m (Either ErrorCode (Stream m))
115config_stream cfg = do
116 let (len,n) = case smatch_cfg cfg of
117 Left m -> let n = encodeUtf8 $ T.pack $ smName m
118 in ( #{const sizeof(xd3_stream)} + B.length n + 1
119 , n )
120 Right _ -> ( #{const sizeof(xd3_stream)}, B.empty )
121 s <- newPinnedByteArray len
122 let sptr = ptr (mutableByteArrayContents s) :: Ptr Xd3Stream
123 fillByteArray s 0 #{const sizeof(xd3_stream)} 0
124 nptr <- case smatch_cfg cfg of
125 Right _ -> writeStringAt s #{const sizeof(xd3_stream)} n
126 Left _ -> return nullPtr
127 c <- do
128 c <- newPinnedByteArray #const sizeof(xd3_config)
129 fillByteArray c 0 #{const sizeof(xd3_config)} 0
130 writeAtByte c #{off xd3_config, winsize} (winsize cfg)
131 writeAtByte c #{off xd3_config, sprevsz} (sprevsz cfg)
132 writeAtByte c #{off xd3_config, iopt_size} (iopt_size cfg)
133 writeAtByte c #{off xd3_config, flags} (coerce (flags cfg) :: Word32)
134 writeCompressorConfig c #{off xd3_config, sec_data} (sec_data cfg)
135 writeCompressorConfig c #{off xd3_config, sec_inst} (sec_inst cfg)
136 writeCompressorConfig c #{off xd3_config, sec_addr} (sec_addr cfg)
137 let msel :: #type xd3_smatch_cfg
138 msel = either (const #{const XD3_SMATCH_SOFT})
139 (fromIntegral . fromEnum)
140 (smatch_cfg cfg)
141 writeAtByte c (#{off xd3_config, smatch_cfg}) msel
142 case smatch_cfg cfg of
143 Right _ -> return ()
144 Left matcher -> do
145 let o = offset :: Offset #offset xd3_config,smatcher_soft
146 writeAtByte c (o +. (#{off xd3_smatcher, name})) nptr
147 writeMatcher c o matcher
148 unsafeFreezeByteArray c
149 let cptr = ptr (byteArrayContents c) :: Ptr Config
150 srcvar <- newMutVar Nothing
151 stream <- unsafeIOToPrim $ do
152 let finalize = do
153 -- freeHaskellFunPtr: aloc,free,getblk
154 xd3_abort_stream sptr
155 xd3_close_stream sptr
156 xd3_free_stream sptr
157 seq srcvar $ seq s $ return () -- Keep array s alive until the ffi functions finish.
158 fp <- newForeignPtr sptr finalize
159 return Stream
160 { streamArray = s
161 , streamPtr = fp
162 , streamSource = srcvar
163 }
164 unsafeIOToPrim (xd3_config_stream sptr cptr) >>= \case
165 XD3_SUCCESS -> return $ c `seq` Right stream
166 ecode -> return $ Left ecode
167
168writeStringAt :: PrimMonad m => MutableByteArray s -> Int -> B.ByteString -> m (Ptr a)
169writeStringAt src o bsname = do
170 (p,cnt) <- unsafeIOToPrim $ B.unsafeUseAsCStringLen bsname return
171 let nptr = ptr (mutableByteArrayContents src) `plusPtr` o
172 copyAddr (adr nptr) (adr p) cnt
173 writeOffAddr (adr nptr) cnt (0 :: Word8)
174 return nptr
175
176data Xd3Source
177
178foreign import ccall "xdelta3.h xd3_set_source" xd3_set_source :: Ptr Xd3Stream -> Ptr Xd3Source -> IO ErrorCode
179
180set_source :: PrimMonad m =>
181 Stream m -> String -- ^ name for debug/print purposes.
182 -> Usize_t -- ^ block size
183 -> Xoff_t -- ^ maximum visible buffer (Suggested: set same as block size).
184 -- Rounds up to approx 16k.
185 -> m ()
186set_source stream nm blksz maxwinsz = do
187 let bsname = encodeUtf8 $ T.pack nm
188 src <- newPinnedByteArray $ 1 + B.length bsname + #{const sizeof(xd3_source)}
189 nptr <- writeStringAt src #{const sizeof(xd3_source)} bsname
190 writeAtByte src (#{off xd3_source, blksize }) blksz
191 writeAtByte src (#{off xd3_source, name }) nptr
192 writeAtByte src (#{off xd3_source, max_winsize}) maxwinsz
193 writeAtByte src (#{off xd3_source, curblkno }) (maxBound :: Xoff_t)
194 {-
195 writeAtByte (streamArray stream)
196 #{offset xd3_stream, getblk}
197 nullPtr -- xdelta3.h documents this as an internal field.
198 -}
199 let strm = ptr (mutableByteArrayContents $ streamArray stream)
200 srcptr = ptr (mutableByteArrayContents src)
201 unsafeIOToPrim (xd3_set_source strm srcptr)
202 writeMutVar (streamSource stream) (Just src)
203
204data XDeltaMethods m u = XDeltaMethods
205 { xConfig :: Config
206 , xGetSource :: Xoff_t -> B.ByteString
207 , xOutput :: Ptr Word8 -> Int -> m u
208 , xOnError :: ErrorCode -> String -> m u
209 , xBlockSize :: Usize_t
210 , xInterleave :: forall a. m a -> m a
211 }
212
213-- -- | Checks for legal flag changes.
214-- foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Xd3Stream -> Flags -> IO ()
215
216setFlag :: PrimMonad m => Flags -> Stream m -> Bool -> m ()
217setFlag b stream wantFlush = do
218 f <- readAtByte (streamArray stream) (#{off xd3_stream, flags})
219 writeAtByte (streamArray stream) (#{off xd3_stream, flags})
220 . (coerce :: Flags -> Word32)
221 $ if wantFlush then Flags f .|. b
222 else Flags f .&. complement b
223
224setFlush :: PrimMonad m => Stream m -> Bool -> m ()
225setFlush = setFlag XD3_FLUSH
226
227setSkipWindow :: PrimMonad m => Stream m -> Bool -> m ()
228setSkipWindow = setFlag XD3_SKIP_WINDOW
229
230-- -- declared static
231-- foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Xd3Stream -> Ptr a -> Usize_t -> IO ()
232
233avail_input :: PrimMonad m => Stream m -> Ptr a -> Usize_t -> m ()
234avail_input stream p sz = do
235 writeAtByte (streamArray stream) (#{off xd3_stream, next_in}) p
236 writeAtByte (streamArray stream) (#{off xd3_stream, avail_in}) sz
237
238-- | This acknowledges receipt of output data, must be called after any
239-- XD3_OUTPUT return.
240-- -- declared static
241-- foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Xd3Stream -> IO ()
242
243nextOut :: PrimMonad m => Stream m -> ((Ptr Word8, Usize_t) -> m a) -> m a
244nextOut stream action = do
245 buf <- (,)
246 <$> readAtByte (streamArray stream) (#{off xd3_stream, next_out})
247 <*> readAtByte (streamArray stream) (#{off xd3_stream, avail_out})
248 a <- action buf
249 -- unsafeIOToPrim $ xd3_consume_output (ptr $ mutableByteArrayContents $ streamArray stream)
250 writeAtByte (streamArray stream) (#{off xd3_stream, avail_out}) (0 :: Usize_t)
251 return a
252
253
254requestedBlockNumber :: PrimMonad m => Stream m -> m (Maybe Xoff_t)
255requestedBlockNumber stream = do
256 msrc <- readMutVar $ streamSource stream
257 forM msrc $ \src -> readAtByte src (#{off xd3_source, getblkno})
258
259data CurrentBlock = CurrentBlock
260 { blkno :: !Xoff_t -- ^ current block number
261 , blkSize :: !Usize_t -- ^ number of bytes on current block: must be >= 0 and <= 'srcBlockSize'
262 , blkPtr :: !(Ptr Word8) -- ^ current block array
263 }
264
265-- -- declared static
266-- foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Xd3Stream -> IO CString
267
268-- | Gives some extra information about the latest library error, if any
269-- is known.
270errorString :: PrimMonad m => Stream m -> m String
271errorString stream = do
272 cstring <- readAtByte (streamArray stream) (#{off xd3_stream, msg})
273 if cstring /= nullPtr
274 then unsafeIOToPrim $ peekCString cstring -- TODO: avoid unsafeIOToPrim
275 else return ""
276
277pokeCurrentBlock :: PrimMonad m => Stream m -> CurrentBlock -> m ()
278pokeCurrentBlock stream (CurrentBlock no sz ptr) = do
279 msrc <- readMutVar $ streamSource stream
280 forM_ msrc $ \src -> do
281 writeAtByte src (#{off xd3_source, curblkno}) no
282 writeAtByte src (#{off xd3_source, onblk}) sz
283 writeAtByte src (#{off xd3_source, curblk}) ptr
284
285
286withByteString :: PrimBase m => B.ByteString -> (Ptr b -> Usize_t -> m a) -> m a
287withByteString d act =
288 let (fp,off,len) = B.toForeignPtr d
289 in unsafeIOToPrim $ withForeignPtr fp $ \ptr -> unsafePrimToIO $ do
290 act (ptr `plusPtr` off) (fromIntegral len)
291
292xdelta :: (PrimBase m, Monoid u) => XDeltaMethods m u -> (Stream m -> m ErrorCode) -> [B.ByteString] -> m u
293xdelta x xxcode_input ds = do
294 mstream <- config_stream (xConfig x)
295 either (\e _ -> xOnError x e "config_stream failed")
296 (flip ($))
297 mstream $ \stream -> do
298 set_source stream "VCDIFF" (xBlockSize x) (xBlockSize x)
299 let go withBlk [] = return mempty
300 go withBlk (d:ds) = do
301 let (fp,off,len) = B.toForeignPtr d
302 eof = null ds
303 when eof $ setFlush stream True
304 withByteString d $ \indata len -> do
305 avail_input stream indata len
306 go2 withBlk eof ds
307 go2 withBlk eof ds = do
308 ret <- withBlk $ xxcode_input stream
309 case ret of
310 XD3_INPUT -> if (not eof) then go withBlk ds else return mempty
311 XD3_OUTPUT -> do
312 m' <- nextOut stream (\(p,len) -> xOutput x p (fromIntegral len))
313 ms <- xInterleave x $ go2 withBlk eof ds
314 return $ m' <> ms
315 XD3_GETSRCBLK -> do
316 Just n <- requestedBlockNumber stream
317 let blk = xGetSource x n
318 withBlk' act = withByteString blk $ \p len -> do
319 pokeCurrentBlock stream $ CurrentBlock n len p
320 when (len < xBlockSize x) $ do
321 Just src <- readMutVar $ streamSource stream
322 writeAtByte src (#{off xd3_source, eof_known}) (1 :: #{type int})
323 act
324 go2 withBlk' eof ds
325 XD3_GOTHEADER -> go2 withBlk eof ds -- No
326 XD3_WINSTART -> go2 withBlk eof ds -- action
327 XD3_WINFINISH -> go2 withBlk eof ds -- neccessary
328 -- -- These are set for each XD3_WINFINISH return.
329 -- xd3_encoder_used_source :: Ptr Stream -> IO Bool
330 -- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t
331 -- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t
332 e -> do
333 s <- errorString stream
334 xOnError x e s
335 xInterleave x $ go id ds
336
337
338foreign import ccall "xdelta3.h xd3_decode_input" xd3_decode_input :: Ptr Xd3Stream -> IO ErrorCode
339foreign import ccall "xdelta3.h xd3_encode_input" xd3_encode_input :: Ptr Xd3Stream -> IO ErrorCode
340
341decode_input :: PrimMonad m => Stream m -> m ErrorCode
342decode_input stream =
343 unsafeIOToPrim $ xd3_decode_input (ptr $ mutableByteArrayContents $ streamArray stream)
344
345encode_input :: PrimMonad m => Stream m -> m ErrorCode
346encode_input stream =
347 unsafeIOToPrim $ xd3_encode_input (ptr $ mutableByteArrayContents $ streamArray stream)
348
349-- RFC 3284
350newtype VCDIFF = VCDIFF L.ByteString
351 deriving Show
352
353chunksOf :: Usize_t -> L.ByteString -> [B.ByteString]
354chunksOf len bs | L.null bs = []
355 | otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs
356 in L.toStrict b : chunksOf len bs'
357
358computeDiff :: Config -> L.ByteString -> L.ByteString -> Result VCDIFF
359computeDiff cfg source patched = fmap VCDIFF $ xdeltaPure encode_input cfg source patched
360
361applyPatch :: Config -> L.ByteString -> VCDIFF -> Result L.ByteString
362applyPatch cfg source (VCDIFF delta) = xdeltaPure decode_input cfg source delta
363
364data Result x = Result
365 { result :: x -- ^ A possibly invalid result. To consume a lazy stream with fusion, avoid
366 -- evaluating 'resultError' until this field is fully processed.
367 , resultError :: Maybe (ErrorCode,String)
368 -- ^ If something went wrong while producing 'result', this
369 -- is an error code and message indicating what.
370 } deriving (Show,Functor)
371
372instance Monoid x => Monoid (Result x) where
373 mempty = Result mempty Nothing
374 mappend (Result x xe) y = Result (mappend x $ result y) (maybe (resultError y) Just xe)
375
376xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> Result L.ByteString
377xdeltaPure codec cfg source input =
378 let bsize = chunk_size cfg
379 ds = chunksOf bsize input
380 smap = IntMap.fromList $ zip [0..] (chunksOf bsize source)
381 x :: XDeltaMethods (ST s) (Result L.ByteString)
382 x = XDeltaMethods
383 { xConfig = cfg
384 , xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of
385 Nothing -> B.empty
386 Just bs -> bs
387 , xOutput = \ptr len -> unsafeIOToST $ flip Result Nothing . L.fromStrict
388 <$> B.packCStringLen (castPtr ptr,len)
389 , xOnError = \e s -> return (Result L.empty (Just (e,s)))
390 , xBlockSize = bsize
391 , xInterleave = unsafeInterleaveST
392 }
393 in runST $ xdelta x codec ds
394
395defaultConfig :: Config
396defaultConfig = Config
397 { winsize = XD3_DEFAULT_WINSIZE
398 , sprevsz = XD3_DEFAULT_SPREVSZ
399 , iopt_size = XD3_DEFAULT_IOPT_SIZE
400 , flags = mempty
401 , sec_data = CompressorConfig 0 0 0
402 , sec_inst = CompressorConfig 0 0 0
403 , sec_addr = CompressorConfig 0 0 0
404 , smatch_cfg = Right SMATCH_DEFAULT
405 , chunk_size = 4096
406 }