{-# LANGUAGE BangPatterns #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NondecreasingIndentation #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} -- | Create and apply binary diffs (in the 'VCDIFF' format) to lazy bytestrings. module Data.VCDIFF ( VCDIFF , encodeVCDIFF , decodeVCDIFF , Config(..) , defaultConfig , Flags , pattern XD3_ADLER32 , Result(..) , computeDiff , applyPatch) where import Control.Monad import Control.Monad.Primitive import Control.Monad.ST import Control.Monad.ST.Unsafe import Data.Bits import qualified Data.ByteString as B import qualified Data.ByteString.Unsafe as B import qualified Data.ByteString.Internal as B import qualified Data.ByteString.Lazy as L import Data.Coerce import Data.Int import qualified Data.IntMap as IntMap import Data.Monoid import Data.Primitive.Addr import Data.Primitive.ByteArray import Data.Primitive.ByteArray.Util import Data.Primitive.MutVar import Data.STRef import qualified Data.Text as T import Data.Text.Encoding import Data.Word import Foreign.C.Types import Foreign.C.String import Foreign.ForeignPtr (withForeignPtr,touchForeignPtr) import Foreign.Ptr import Foreign.Concurrent import Foreign.Storable import Foreign.ForeignPtr (ForeignPtr) import GHC.Exts import GHC.TypeLits import Data.VCDIFF.Types import Data.VCDIFF.XDelta data Stream m = Stream { streamArray :: StreamArray m , streamPtr :: ForeignPtr Xd3Stream -- ^ HACK: this is only used to attach a finalizer -- to 'streamArray'. Don't use this pointer. -- This would be unnecessary if I could create a -- MutableByteArray with a finalizer attached. , streamSource :: MutVar (PrimState m) (Maybe (Source m)) } keepAlive srcvar s = do seq srcvar $ seq s $ return () -- Keep array s alive until the ffi functions finish. {-# NOINLINE keepAlive #-} -- The xd3_config structure is used to initialize a stream - all data -- is copied into stream so config may be a temporary variable. See -- the [documentation] or comments on the xd3_config structure. config_stream :: PrimMonad m => Config -> m (Either ErrorCode (Stream m)) config_stream cfg = do (s,nptr) <- newStreamArray (either (Just . smName) (const Nothing) (smatch_cfg cfg)) c <- packConfig nptr cfg let cptr = ptr (byteArrayContents c) :: Ptr Config sptr = streamArrayPtr s srcvar <- newMutVar Nothing stream <- unsafeIOToPrim $ do let finalize = do -- freeHaskellFunPtr: aloc,free,getblk xd3_abort_stream sptr xd3_close_stream sptr xd3_free_stream sptr keepAlive srcvar s fp <- newForeignPtr sptr finalize return Stream { streamArray = s , streamPtr = fp , streamSource = srcvar } unsafeIOToPrim (xd3_config_stream sptr cptr) >>= \case XD3_SUCCESS -> return $ c `seq` Right stream ecode -> return $ Left ecode set_source :: PrimMonad m => Stream m -> String -- ^ name for debug/print purposes. -> Usize_t -- ^ block size -> Xoff_t -- ^ maximum visible buffer (Suggested: set same as block size). -- Rounds up to approx 16k. -> m () set_source stream nm blksz maxwinsz = do src <- newSource nm blksz maxwinsz {- writeAtByte (streamArray stream) #{offset xd3_stream, getblk} nullPtr -- xdelta3.h documents this as an internal field. -} let strm = streamArrayPtr $ streamArray stream unsafeIOToPrim (xd3_set_source strm $ sourcePtr src) writeMutVar (streamSource stream) (Just src) data XDeltaMethods m u = XDeltaMethods { xConfig :: Config , xGetSource :: Xoff_t -> B.ByteString , xOutput :: Ptr Word8 -> Int -> m u , xOnError :: ErrorCode -> String -> m u , xBlockSize :: Usize_t , xInterleave :: forall a. m a -> m a } -- -- | Checks for legal flag changes. -- foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Xd3Stream -> Flags -> IO () -- -- declared static -- foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Xd3Stream -> Ptr a -> Usize_t -> IO () -- | This acknowledges receipt of output data, must be called after any -- XD3_OUTPUT return. -- -- declared static -- foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Xd3Stream -> IO () requestedBlockNumber :: PrimMonad m => Stream m -> m (Maybe Xoff_t) requestedBlockNumber stream = do msrc <- readMutVar $ streamSource stream forM msrc sourceRequestedBlocknumber -- -- declared static -- foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Xd3Stream -> IO CString pokeCurrentBlock :: PrimMonad m => Stream m -> CurrentBlock -> m () pokeCurrentBlock stream blk = do msrc <- readMutVar $ streamSource stream forM_ msrc (`sourceWriteCurrentBlock` blk) withByteString :: PrimBase m => B.ByteString -> (Ptr b -> Usize_t -> m a) -> m a withByteString d act = let (fp,off,len) = B.toForeignPtr d in do ptr <- unsafeIOToPrim $ withForeignPtr fp $ return a <- case fromIntegral len of 0 -> act nullPtr 0 l -> act (ptr `plusPtr` off) l unsafeIOToPrim $ touchForeignPtr fp return a xdelta :: (PrimBase m, Monoid u) => XDeltaMethods m u -> (Stream m -> m ErrorCode) -> [B.ByteString] -> m u xdelta x xxcode_input ds = do mstream <- config_stream (xConfig x) either (\e _ -> xOnError x e "config_stream failed") (flip ($)) mstream $ \stream -> do set_source stream "VCDIFF" (xBlockSize x) (xBlockSize x) let go withBlk [] = return mempty go withBlk (d:ds) = do let (fp,off,len) = B.toForeignPtr d eof = null ds when eof $ setFlush (streamArray stream) True withByteString d $ \indata len -> do avail_input (streamArray stream) indata len go2 withBlk eof ds go2 withBlk eof ds = do ret <- withBlk $ xxcode_input stream case ret of XD3_INPUT -> if (not eof) then go withBlk ds else return mempty XD3_OUTPUT -> do m' <- nextOut (streamArray stream) (\(p,len) -> xOutput x p (fromIntegral len)) ms <- xInterleave x $ go2 withBlk eof ds return $ m' <> ms XD3_GETSRCBLK -> do Just n <- requestedBlockNumber stream let blk = xGetSource x n withBlk' act = withByteString blk $ \p len -> do pokeCurrentBlock stream $ CurrentBlock n len p when (len < xBlockSize x) $ do Just src <- readMutVar $ streamSource stream sourceWriteEOFKnown src True act go2 withBlk' eof ds XD3_GOTHEADER -> go2 withBlk eof ds -- No XD3_WINSTART -> go2 withBlk eof ds -- action XD3_WINFINISH -> go2 withBlk eof ds -- neccessary -- -- These are set for each XD3_WINFINISH return. -- xd3_encoder_used_source :: Ptr Stream -> IO Bool -- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t -- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t e -> do s <- errorString (streamArray stream) xOnError x e s xInterleave x $ go id ds decode_input :: PrimMonad m => Stream m -> m ErrorCode decode_input stream = unsafeIOToPrim $ xd3_decode_input (streamArrayPtr $ streamArray stream) encode_input :: PrimMonad m => Stream m -> m ErrorCode encode_input stream = unsafeIOToPrim $ xd3_encode_input (streamArrayPtr $ streamArray stream) -- | A binary diff (or patch) in the VCDIFF format documented by RFC 3284. -- -- When used as a patch, context is ignored and there is no fuzz. This means -- the file you apply the patch to must have identical contents to the source -- used to create it. /WARNING:/ This wont be checked unless 'XD3_ADLER32' flag -- was specified to 'computeDiff'. newtype VCDIFF = VCDIFF { encodeVCDIFF :: L.ByteString } deriving Show decodeVCDIFF :: L.ByteString -> Either String VCDIFF decodeVCDIFF = Right . VCDIFF chunksOf :: Usize_t -> L.ByteString -> [B.ByteString] chunksOf len bs | L.null bs = [] | otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs in L.toStrict b : chunksOf len bs' -- | Compute a binary diff. For most options, use 'defaultConfig', but you may -- want to set 'flags' to 'XD3_ADLER32' to add checksumming safety to the -- patch, and a larger 'chunk_size' may yield greater compression. computeDiff :: Config -> L.ByteString -> L.ByteString -> Result VCDIFF computeDiff cfg source patched = fmap VCDIFF $ xdeltaPure encode_input cfg source patched -- | Apply a patch. It is okay to use 'defaultConfig' for most options, but -- you may want to specify an alternative'chunk_size' for streaming. applyPatch :: Config -> L.ByteString -> VCDIFF -> Result L.ByteString applyPatch cfg source (VCDIFF delta) = xdeltaPure decode_input cfg source delta -- | The result of a computation that may fail. On failure, the 'result' field -- is truncated or invalid. data Result x = Result { result :: x -- ^ A possibly invalid result. To consume a lazy stream with fusion, avoid -- evaluating 'resultError' until this field is fully processed. , resultError :: Maybe (ErrorCode,String) -- ^ If something went wrong while producing 'result', this -- is an error code and message indicating what. } deriving (Show,Functor) instance Monoid x => Monoid (Result x) where mempty = Result mempty Nothing mappend (Result x xe) y = Result (mappend x $ result y) (maybe (resultError y) Just xe) xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> Result L.ByteString xdeltaPure codec cfg source input = let bsize = chunk_size cfg ds = chunksOf bsize input smap = IntMap.fromList $ zip [0..] (chunksOf bsize source) x :: XDeltaMethods (ST s) (Result L.ByteString) x = XDeltaMethods { xConfig = cfg , xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of Nothing -> B.empty Just bs -> bs , xOutput = \ptr len -> unsafeIOToST $ flip Result Nothing . L.fromStrict <$> B.packCStringLen (castPtr ptr,len) , xOnError = \e s -> return (Result L.empty (Just (e,s))) , xBlockSize = bsize , xInterleave = unsafeInterleaveST } in runST $ xdelta x codec ds -- | Sensible defaults. All of these configuration items are passed on to the -- xdelta algorithm except 'chunk_size' which is used by 'computeDiff' and -- 'applyPatch' to divide the input into chunks (see 'chunksOf'). -- -- Consider enabling flags = 'XD3_ADLER32' for added safety. defaultConfig :: Config defaultConfig = Config { winsize = XD3_DEFAULT_WINSIZE , sprevsz = XD3_DEFAULT_SPREVSZ , iopt_size = XD3_DEFAULT_IOPT_SIZE , flags = mempty , sec_data = CompressorConfig 0 0 0 , sec_inst = CompressorConfig 0 0 0 , sec_addr = CompressorConfig 0 0 0 , smatch_cfg = Right SMATCH_DEFAULT , chunk_size = 4096 }