{-# OPTIONS_GHC -Wno-partial-type-signatures #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NondecreasingIndentation #-} {-# LANGUAGE PartialTypeSignatures #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} module Data.VCDIFF where import Control.Monad import Control.Monad.Primitive import Control.Monad.ST import Control.Monad.ST.Unsafe import Data.Bits import qualified Data.ByteString as B import qualified Data.ByteString.Unsafe as B import qualified Data.ByteString.Internal as B import qualified Data.ByteString.Lazy as L import Data.Coerce import Data.Int import qualified Data.IntMap as IntMap import Data.Monoid import Data.Primitive.Addr import Data.Primitive.ByteArray import Data.Primitive.ByteArray.Util import Data.Primitive.MutVar import Data.STRef import qualified Data.Text as T import Data.Text.Encoding import Data.Word import Foreign.C.Types import Foreign.C.String import Foreign.ForeignPtr (withForeignPtr) import Foreign.Ptr import Foreign.Concurrent import Foreign.Storable import Foreign.ForeignPtr (ForeignPtr) import GHC.Exts import GHC.TypeLits import Data.VCDIFF.Types import Data.VCDIFF.XDelta #ifndef SIZEOF_SIZE_T #define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ #define SIZEOF_UNSIGNED_INT __SIZEOF_INT__ #define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__ #define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__ #define static_assert(...) #endif #include #include "offset.h" data Stream m = Stream { streamArray :: MutableByteArray (PrimState m) , streamPtr :: ForeignPtr Xd3Stream -- ^ HACK: this is only used to attach a finalizer -- to 'streamArray'. Don't use this pointer. -- This would be unnecessary if I could create a -- MutableByteArray with a finalizer attached. , streamSource :: MutVar (PrimState m) (Maybe (Source m)) } foreign import ccall "xdelta3.h xd3_config_stream" xd3_config_stream :: Ptr Xd3Stream -> Ptr Config -> IO ErrorCode foreign import ccall "xdelta3.h xd3_free_stream" xd3_free_stream :: Ptr Xd3Stream -> IO () foreign import ccall "xdelta3.h xd3_abort_stream" xd3_abort_stream :: Ptr Xd3Stream -> IO () foreign import ccall "xdelta3.h xd3_close_stream" xd3_close_stream :: Ptr Xd3Stream -> IO ErrorCode writeCompressorConfig :: (PrimMonad m, _) => MutableByteArray (PrimState m) -> Offset j -> CompressorConfig -> m () writeCompressorConfig c o sec = do writeAtByte c (o +. #{off xd3_sec_cfg,ngroups}) (ngroups sec) writeAtByte c (o +. #{off xd3_sec_cfg,sector_size}) (sector_size sec) writeAtByte c (o +. #{off xd3_sec_cfg,inefficient}) (inefficient sec) writeMatcher :: (PrimMonad m, _) => MutableByteArray (PrimState m) -> Offset j -> StringMatcher -> m () writeMatcher c o sm = do -- handled elsewhere: const char *name; <- smName :: String writeAtByte c (o +. #{off xd3_smatcher, string_match }) (smStringMatch sm) writeAtByte c (o +. #{off xd3_smatcher, large_look }) (smLargeLook sm) writeAtByte c (o +. #{off xd3_smatcher, large_step }) (smLargeStep sm) writeAtByte c (o +. #{off xd3_smatcher, small_look }) (smSmallLook sm) writeAtByte c (o +. #{off xd3_smatcher, small_chain }) (smSmallChain sm) writeAtByte c (o +. #{off xd3_smatcher, small_lchain }) (smSmallLchain sm) writeAtByte c (o +. #{off xd3_smatcher, max_lazy }) (smMaxLazy sm) writeAtByte c (o +. #{off xd3_smatcher, long_enough }) (smLongEnough sm) -- The xd3_config structure is used to initialize a stream - all data -- is copied into stream so config may be a temporary variable. See -- the [documentation] or comments on the xd3_config structure. config_stream :: PrimMonad m => Config -> m (Either ErrorCode (Stream m)) config_stream cfg = do let (len,n) = case smatch_cfg cfg of Left m -> let n = encodeUtf8 $ T.pack $ smName m in ( #{const sizeof(xd3_stream)} + B.length n + 1 , n ) Right _ -> ( #{const sizeof(xd3_stream)}, B.empty ) s <- newPinnedByteArray len let sptr = ptr (mutableByteArrayContents s) :: Ptr Xd3Stream fillByteArray s 0 #{const sizeof(xd3_stream)} 0 nptr <- case smatch_cfg cfg of Right _ -> writeStringAt s #{const sizeof(xd3_stream)} n Left _ -> return nullPtr c <- do c <- newPinnedByteArray #const sizeof(xd3_config) fillByteArray c 0 #{const sizeof(xd3_config)} 0 writeAtByte c #{off xd3_config, winsize} (winsize cfg) writeAtByte c #{off xd3_config, sprevsz} (sprevsz cfg) writeAtByte c #{off xd3_config, iopt_size} (iopt_size cfg) writeAtByte c #{off xd3_config, flags} (coerce (flags cfg) :: Word32) writeCompressorConfig c #{off xd3_config, sec_data} (sec_data cfg) writeCompressorConfig c #{off xd3_config, sec_inst} (sec_inst cfg) writeCompressorConfig c #{off xd3_config, sec_addr} (sec_addr cfg) let msel :: #type xd3_smatch_cfg msel = either (const #{const XD3_SMATCH_SOFT}) (fromIntegral . fromEnum) (smatch_cfg cfg) writeAtByte c (#{off xd3_config, smatch_cfg}) msel case smatch_cfg cfg of Right _ -> return () Left matcher -> do let o = offset :: Offset #offset xd3_config,smatcher_soft writeAtByte c (o +. (#{off xd3_smatcher, name})) nptr writeMatcher c o matcher unsafeFreezeByteArray c let cptr = ptr (byteArrayContents c) :: Ptr Config srcvar <- newMutVar Nothing stream <- unsafeIOToPrim $ do let finalize = do -- freeHaskellFunPtr: aloc,free,getblk xd3_abort_stream sptr xd3_close_stream sptr xd3_free_stream sptr seq srcvar $ seq s $ return () -- Keep array s alive until the ffi functions finish. fp <- newForeignPtr sptr finalize return Stream { streamArray = s , streamPtr = fp , streamSource = srcvar } unsafeIOToPrim (xd3_config_stream sptr cptr) >>= \case XD3_SUCCESS -> return $ c `seq` Right stream ecode -> return $ Left ecode foreign import ccall "xdelta3.h xd3_set_source" xd3_set_source :: Ptr Xd3Stream -> Ptr Xd3Source -> IO ErrorCode set_source :: PrimMonad m => Stream m -> String -- ^ name for debug/print purposes. -> Usize_t -- ^ block size -> Xoff_t -- ^ maximum visible buffer (Suggested: set same as block size). -- Rounds up to approx 16k. -> m () set_source stream nm blksz maxwinsz = do src <- newSource nm blksz maxwinsz {- writeAtByte (streamArray stream) #{offset xd3_stream, getblk} nullPtr -- xdelta3.h documents this as an internal field. -} let strm = ptr (mutableByteArrayContents $ streamArray stream) unsafeIOToPrim (xd3_set_source strm $ sourcePtr src) writeMutVar (streamSource stream) (Just src) data XDeltaMethods m u = XDeltaMethods { xConfig :: Config , xGetSource :: Xoff_t -> B.ByteString , xOutput :: Ptr Word8 -> Int -> m u , xOnError :: ErrorCode -> String -> m u , xBlockSize :: Usize_t , xInterleave :: forall a. m a -> m a } -- -- | Checks for legal flag changes. -- foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Xd3Stream -> Flags -> IO () setFlag :: PrimMonad m => Flags -> Stream m -> Bool -> m () setFlag b stream wantFlush = do f <- readAtByte (streamArray stream) (#{off xd3_stream, flags}) writeAtByte (streamArray stream) (#{off xd3_stream, flags}) . (coerce :: Flags -> Word32) $ if wantFlush then Flags f .|. b else Flags f .&. complement b setFlush :: PrimMonad m => Stream m -> Bool -> m () setFlush = setFlag XD3_FLUSH setSkipWindow :: PrimMonad m => Stream m -> Bool -> m () setSkipWindow = setFlag XD3_SKIP_WINDOW -- -- declared static -- foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Xd3Stream -> Ptr a -> Usize_t -> IO () avail_input :: PrimMonad m => Stream m -> Ptr a -> Usize_t -> m () avail_input stream p sz = do writeAtByte (streamArray stream) (#{off xd3_stream, next_in}) p writeAtByte (streamArray stream) (#{off xd3_stream, avail_in}) sz -- | This acknowledges receipt of output data, must be called after any -- XD3_OUTPUT return. -- -- declared static -- foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Xd3Stream -> IO () nextOut :: PrimMonad m => Stream m -> ((Ptr Word8, Usize_t) -> m a) -> m a nextOut stream action = do buf <- (,) <$> readAtByte (streamArray stream) (#{off xd3_stream, next_out}) <*> readAtByte (streamArray stream) (#{off xd3_stream, avail_out}) a <- action buf -- unsafeIOToPrim $ xd3_consume_output (ptr $ mutableByteArrayContents $ streamArray stream) writeAtByte (streamArray stream) (#{off xd3_stream, avail_out}) (0 :: Usize_t) return a requestedBlockNumber :: PrimMonad m => Stream m -> m (Maybe Xoff_t) requestedBlockNumber stream = do msrc <- readMutVar $ streamSource stream forM msrc sourceRequestedBlocknumber -- -- declared static -- foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Xd3Stream -> IO CString -- | Gives some extra information about the latest library error, if any -- is known. errorString :: PrimMonad m => Stream m -> m String errorString stream = do cstring <- readAtByte (streamArray stream) (#{off xd3_stream, msg}) if cstring /= nullPtr then unsafeIOToPrim $ peekCString cstring -- TODO: avoid unsafeIOToPrim else return "" pokeCurrentBlock :: PrimMonad m => Stream m -> CurrentBlock -> m () pokeCurrentBlock stream blk = do msrc <- readMutVar $ streamSource stream forM_ msrc (`sourceWriteCurrentBlock` blk) withByteString :: PrimBase m => B.ByteString -> (Ptr b -> Usize_t -> m a) -> m a withByteString d act = let (fp,off,len) = B.toForeignPtr d in unsafeIOToPrim $ withForeignPtr fp $ \ptr -> unsafePrimToIO $ do act (ptr `plusPtr` off) (fromIntegral len) xdelta :: (PrimBase m, Monoid u) => XDeltaMethods m u -> (Stream m -> m ErrorCode) -> [B.ByteString] -> m u xdelta x xxcode_input ds = do mstream <- config_stream (xConfig x) either (\e _ -> xOnError x e "config_stream failed") (flip ($)) mstream $ \stream -> do set_source stream "VCDIFF" (xBlockSize x) (xBlockSize x) let go withBlk [] = return mempty go withBlk (d:ds) = do let (fp,off,len) = B.toForeignPtr d eof = null ds when eof $ setFlush stream True withByteString d $ \indata len -> do avail_input stream indata len go2 withBlk eof ds go2 withBlk eof ds = do ret <- withBlk $ xxcode_input stream case ret of XD3_INPUT -> if (not eof) then go withBlk ds else return mempty XD3_OUTPUT -> do m' <- nextOut stream (\(p,len) -> xOutput x p (fromIntegral len)) ms <- xInterleave x $ go2 withBlk eof ds return $ m' <> ms XD3_GETSRCBLK -> do Just n <- requestedBlockNumber stream let blk = xGetSource x n withBlk' act = withByteString blk $ \p len -> do pokeCurrentBlock stream $ CurrentBlock n len p when (len < xBlockSize x) $ do Just src <- readMutVar $ streamSource stream sourceWriteEOFKnown src True act go2 withBlk' eof ds XD3_GOTHEADER -> go2 withBlk eof ds -- No XD3_WINSTART -> go2 withBlk eof ds -- action XD3_WINFINISH -> go2 withBlk eof ds -- neccessary -- -- These are set for each XD3_WINFINISH return. -- xd3_encoder_used_source :: Ptr Stream -> IO Bool -- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t -- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t e -> do s <- errorString stream xOnError x e s xInterleave x $ go id ds foreign import ccall "xdelta3.h xd3_decode_input" xd3_decode_input :: Ptr Xd3Stream -> IO ErrorCode foreign import ccall "xdelta3.h xd3_encode_input" xd3_encode_input :: Ptr Xd3Stream -> IO ErrorCode decode_input :: PrimMonad m => Stream m -> m ErrorCode decode_input stream = unsafeIOToPrim $ xd3_decode_input (ptr $ mutableByteArrayContents $ streamArray stream) encode_input :: PrimMonad m => Stream m -> m ErrorCode encode_input stream = unsafeIOToPrim $ xd3_encode_input (ptr $ mutableByteArrayContents $ streamArray stream) -- RFC 3284 newtype VCDIFF = VCDIFF L.ByteString deriving Show chunksOf :: Usize_t -> L.ByteString -> [B.ByteString] chunksOf len bs | L.null bs = [] | otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs in L.toStrict b : chunksOf len bs' computeDiff :: Config -> L.ByteString -> L.ByteString -> Result VCDIFF computeDiff cfg source patched = fmap VCDIFF $ xdeltaPure encode_input cfg source patched applyPatch :: Config -> L.ByteString -> VCDIFF -> Result L.ByteString applyPatch cfg source (VCDIFF delta) = xdeltaPure decode_input cfg source delta data Result x = Result { result :: x -- ^ A possibly invalid result. To consume a lazy stream with fusion, avoid -- evaluating 'resultError' until this field is fully processed. , resultError :: Maybe (ErrorCode,String) -- ^ If something went wrong while producing 'result', this -- is an error code and message indicating what. } deriving (Show,Functor) instance Monoid x => Monoid (Result x) where mempty = Result mempty Nothing mappend (Result x xe) y = Result (mappend x $ result y) (maybe (resultError y) Just xe) xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> Result L.ByteString xdeltaPure codec cfg source input = let bsize = chunk_size cfg ds = chunksOf bsize input smap = IntMap.fromList $ zip [0..] (chunksOf bsize source) x :: XDeltaMethods (ST s) (Result L.ByteString) x = XDeltaMethods { xConfig = cfg , xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of Nothing -> B.empty Just bs -> bs , xOutput = \ptr len -> unsafeIOToST $ flip Result Nothing . L.fromStrict <$> B.packCStringLen (castPtr ptr,len) , xOnError = \e s -> return (Result L.empty (Just (e,s))) , xBlockSize = bsize , xInterleave = unsafeInterleaveST } in runST $ xdelta x codec ds defaultConfig :: Config defaultConfig = Config { winsize = XD3_DEFAULT_WINSIZE , sprevsz = XD3_DEFAULT_SPREVSZ , iopt_size = XD3_DEFAULT_IOPT_SIZE , flags = mempty , sec_data = CompressorConfig 0 0 0 , sec_inst = CompressorConfig 0 0 0 , sec_addr = CompressorConfig 0 0 0 , smatch_cfg = Right SMATCH_DEFAULT , chunk_size = 4096 }