{-# LANGUAGE BangPatterns #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NondecreasingIndentation #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE DeriveFunctor #-} module Data.XDelta where import Control.Monad import Control.Monad.Primitive import Control.Monad.ST import Control.Monad.ST.Unsafe import Data.BA import Data.Bits import qualified Data.ByteString as B import qualified Data.ByteString.Unsafe as B import qualified Data.ByteString.Internal as B import qualified Data.ByteString.Lazy as L import Data.Coerce import Data.Int import qualified Data.IntMap as IntMap import Data.Monoid import Data.Primitive.Addr import Data.Primitive.ByteArray import Data.Primitive.MutVar import Data.STRef import qualified Data.Text as T import Data.Text.Encoding import Data.Word import Foreign.C.Types import Foreign.C.String import Foreign.ForeignPtr (withForeignPtr) import Foreign.Ptr import Foreign.Concurrent import Foreign.Storable import Foreign.ForeignPtr (ForeignPtr) import GHC.Exts import XDelta.Types #ifndef SIZEOF_SIZE_T #define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ #define SIZEOF_UNSIGNED_INT __SIZEOF_INT__ #define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__ #define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__ #define static_assert(...) #endif #include data Stream m = Stream { streamArray :: MutableByteArray (PrimState m) , streamPtr :: ForeignPtr Xd3Stream -- ^ HACK: this is only used to attach a finalizer -- to 'streamArray'. Don't use this pointer. -- This would be unnecessary if I could create a -- MutableByteArray with a finalizer attached. , streamSource :: MutVar (PrimState m) (Maybe (MutableByteArray (PrimState m))) } foreign import ccall "xdelta3.h xd3_config_stream" xd3_config_stream :: Ptr Xd3Stream -> Ptr Config -> IO ErrorCode foreign import ccall "xdelta3.h xd3_free_stream" xd3_free_stream :: Ptr Xd3Stream -> IO () foreign import ccall "xdelta3.h xd3_abort_stream" xd3_abort_stream :: Ptr Xd3Stream -> IO () foreign import ccall "xdelta3.h xd3_close_stream" xd3_close_stream :: Ptr Xd3Stream -> IO ErrorCode writeCompressorConfig :: PrimMonad m => MutableByteArray (PrimState m) -> Int -> CompressorConfig -> m () writeCompressorConfig c o sec = do writeAtByte c (o + #{offset xd3_sec_cfg,ngroups}) (ngroups sec) writeAtByte c (o + #{offset xd3_sec_cfg,sector_size}) (sector_size sec) writeAtByte c (o + #{offset xd3_sec_cfg,inefficient}) (inefficient sec) writeMatcher :: PrimMonad m => MutableByteArray (PrimState m) -> Int -> StringMatcher -> m () writeMatcher c o sm = do -- handled elsewhere: const char *name; <- smName :: String writeAtByte c (o + #{offset xd3_smatcher, string_match }) (smStringMatch sm) writeAtByte c (o + #{offset xd3_smatcher, large_look }) (smLargeLook sm) writeAtByte c (o + #{offset xd3_smatcher, large_step }) (smLargeStep sm) writeAtByte c (o + #{offset xd3_smatcher, small_look }) (smSmallLook sm) writeAtByte c (o + #{offset xd3_smatcher, small_chain }) (smSmallChain sm) writeAtByte c (o + #{offset xd3_smatcher, small_lchain }) (smSmallLchain sm) writeAtByte c (o + #{offset xd3_smatcher, max_lazy }) (smMaxLazy sm) writeAtByte c (o + #{offset xd3_smatcher, long_enough }) (smLongEnough sm) ptr :: Addr -> Ptr a ptr (Addr a) = Ptr a adr :: Ptr a -> Addr adr (Ptr a) = Addr a -- The xd3_config structure is used to initialize a stream - all data -- is copied into stream so config may be a temporary variable. See -- the [documentation] or comments on the xd3_config structure. config_stream :: PrimMonad m => Config -> m (Either ErrorCode (Stream m)) config_stream cfg = do let (len,n) = case smatch_cfg cfg of Left m -> let n = encodeUtf8 $ T.pack $ smName m in ( #{const sizeof(xd3_stream)} + B.length n + 1 , n ) Right _ -> ( #{const sizeof(xd3_stream)}, B.empty ) s <- newPinnedByteArray len let sptr = ptr (mutableByteArrayContents s) :: Ptr Xd3Stream fillByteArray s 0 #{const sizeof(xd3_stream)} 0 nptr <- case smatch_cfg cfg of Right _ -> writeStringAt s #{const sizeof(xd3_stream)} n Left _ -> return nullPtr c <- do c <- newPinnedByteArray #const sizeof(xd3_config) fillByteArray c 0 #{const sizeof(xd3_config)} 0 writeAtByte c #{offset xd3_config, winsize} (winsize cfg) writeAtByte c #{offset xd3_config, sprevsz} (sprevsz cfg) writeAtByte c #{offset xd3_config, iopt_size} (iopt_size cfg) writeAtByte c #{offset xd3_config, flags} (coerce (flags cfg) :: Word32) writeCompressorConfig c #{offset xd3_config, sec_data} (sec_data cfg) writeCompressorConfig c #{offset xd3_config, sec_inst} (sec_inst cfg) writeCompressorConfig c #{offset xd3_config, sec_addr} (sec_addr cfg) let msel :: #type xd3_smatch_cfg msel = either (const #{const XD3_SMATCH_SOFT}) (fromIntegral . fromEnum) (smatch_cfg cfg) writeAtByte c #{offset xd3_config, smatch_cfg} msel case smatch_cfg cfg of Right _ -> return () Left matcher -> do let o = #offset xd3_config,smatcher_soft writeAtByte c (o + #{offset xd3_smatcher, name}) nptr writeMatcher c o matcher unsafeFreezeByteArray c let cptr = ptr (byteArrayContents c) :: Ptr Config srcvar <- newMutVar Nothing stream <- unsafeIOToPrim $ do let finalize = do -- freeHaskellFunPtr: aloc,free,getblk xd3_abort_stream sptr xd3_close_stream sptr xd3_free_stream sptr seq s $ return () -- Keep array s alive until the ffi functions finish. fp <- newForeignPtr sptr finalize return Stream { streamArray = s , streamPtr = fp , streamSource = srcvar } unsafeIOToPrim (xd3_config_stream sptr cptr) >>= \case XD3_SUCCESS -> return $ c `seq` Right stream ecode -> return $ Left ecode writeStringAt :: PrimMonad m => MutableByteArray s -> Int -> B.ByteString -> m (Ptr a) writeStringAt src o bsname = do (p,cnt) <- unsafeIOToPrim $ B.unsafeUseAsCStringLen bsname return let nptr = ptr (mutableByteArrayContents src) `plusPtr` o copyAddr (adr nptr) (adr p) cnt writeOffAddr (adr nptr) cnt (0 :: Word8) return nptr data Xd3Source foreign import ccall "xdelta3.h xd3_set_source" xd3_set_source :: Ptr Xd3Stream -> Ptr Xd3Source -> IO ErrorCode set_source :: PrimMonad m => Stream m -> String -- ^ name for debug/print purposes. -> Usize_t -- ^ block size -> Xoff_t -- ^ maximum visible buffer -> m () set_source stream nm blksz maxwinsz = do let bsname = encodeUtf8 $ T.pack nm src <- newPinnedByteArray $ 1 + B.length bsname + #{const sizeof(xd3_source)} nptr <- writeStringAt src #{const sizeof(xd3_source)} bsname writeAtByte src #{offset xd3_source, blksize } blksz writeAtByte src #{offset xd3_source, name } nptr writeAtByte src #{offset xd3_source, max_winsize} maxwinsz writeAtByte src #{offset xd3_source, curblkno } (maxBound :: Xoff_t) {- writeAtByte (streamArray stream) #{offset xd3_stream, getblk} nullPtr -- xdelta3.h documents this as an internal field. -} let strm = ptr (mutableByteArrayContents $ streamArray stream) srcptr = ptr (mutableByteArrayContents src) unsafeIOToPrim (xd3_set_source strm srcptr) writeMutVar (streamSource stream) (Just src) data XDeltaMethods m u = XDeltaMethods { xConfig :: Config , xGetSource :: Xoff_t -> B.ByteString , xOutput :: Ptr Word8 -> Int -> m u , xOnError :: ErrorCode -> String -> m u , xBlockSize :: Usize_t , xInterleave :: forall a. m a -> m a } -- -- | Checks for legal flag changes. -- foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Xd3Stream -> Flags -> IO () setFlag :: PrimMonad m => Flags -> Stream m -> Bool -> m () setFlag b stream wantFlush = do f <- readAtByte (streamArray stream) #{offset xd3_stream, flags} writeAtByte (streamArray stream) #{offset xd3_stream, flags} . (coerce :: Flags -> Word32) $ if wantFlush then Flags f .|. b else Flags f .&. complement b setFlush :: PrimMonad m => Stream m -> Bool -> m () setFlush = setFlag XD3_FLUSH setSkipWindow :: PrimMonad m => Stream m -> Bool -> m () setSkipWindow = setFlag XD3_SKIP_WINDOW -- -- declared static -- foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Xd3Stream -> Ptr a -> Usize_t -> IO () avail_input :: PrimMonad m => Stream m -> Ptr a -> Usize_t -> m () avail_input stream p sz = do writeAtByte (streamArray stream) #{offset xd3_stream, next_in} p writeAtByte (streamArray stream) #{offset xd3_stream, avail_in} sz -- | This acknowledges receipt of output data, must be called after any -- XD3_OUTPUT return. -- -- declared static -- foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Xd3Stream -> IO () nextOut :: PrimMonad m => Stream m -> ((Ptr Word8, Usize_t) -> m a) -> m a nextOut stream action = do buf <- (,) <$> readAtByte (streamArray stream) #{offset xd3_stream, next_out} <*> readAtByte (streamArray stream) #{offset xd3_stream, avail_out} a <- action buf -- unsafeIOToPrim $ xd3_consume_output (ptr $ mutableByteArrayContents $ streamArray stream) writeAtByte (streamArray stream) #{offset xd3_stream, avail_out} (0 :: Usize_t) return a requestedBlockNumber :: PrimMonad m => Stream m -> m (Maybe Xoff_t) requestedBlockNumber stream = do msrc <- readMutVar $ streamSource stream forM msrc $ \src -> readAtByte src #offset xd3_source, getblkno data CurrentBlock = CurrentBlock { blkno :: !Xoff_t -- ^ current block number , blkSize :: !Usize_t -- ^ number of bytes on current block: must be >= 0 and <= 'srcBlockSize' , blkPtr :: !(Ptr Word8) -- ^ current block array } -- -- declared static -- foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Xd3Stream -> IO CString -- | Gives some extra information about the latest library error, if any -- is known. errorString :: PrimMonad m => Stream m -> m String errorString stream = do cstring <- readAtByte (streamArray stream) #offset xd3_stream, msg if cstring /= nullPtr then unsafeIOToPrim $ peekCString cstring -- TODO: avoid unsafeIOToPrim else return "" pokeCurrentBlock :: PrimMonad m => Stream m -> CurrentBlock -> m () pokeCurrentBlock stream (CurrentBlock no sz ptr) = do msrc <- readMutVar $ streamSource stream forM_ msrc $ \src -> do writeAtByte src #{offset xd3_source, curblkno} no writeAtByte src #{offset xd3_source, onblk} sz writeAtByte src #{offset xd3_source, curblk} ptr withByteString :: PrimBase m => B.ByteString -> (Ptr b -> Usize_t -> m a) -> m a withByteString d act = let (fp,off,len) = B.toForeignPtr d in unsafeIOToPrim $ withForeignPtr fp $ \ptr -> unsafePrimToIO $ do act (ptr `plusPtr` off) (fromIntegral len) xdelta :: (PrimBase m, Monoid u) => XDeltaMethods m u -> (Stream m -> m ErrorCode) -> [B.ByteString] -> m u xdelta x xxcode_input ds = do mstream <- config_stream (xConfig x) either (\e _ -> xOnError x e "config_stream failed") (flip ($)) mstream $ \stream -> do set_source stream "xdelta" (xBlockSize x) (xBlockSize x) let go withBlk [] = return mempty go withBlk (d:ds) = do let (fp,off,len) = B.toForeignPtr d eof = null ds when eof $ setFlush stream True withByteString d $ \indata len -> do avail_input stream indata len go2 withBlk eof ds go2 withBlk eof ds = do ret <- withBlk $ xxcode_input stream case ret of XD3_INPUT -> if (not eof) then go withBlk ds else return mempty XD3_OUTPUT -> do m' <- nextOut stream (\(p,len) -> xOutput x p (fromIntegral len)) ms <- xInterleave x $ go2 withBlk eof ds return $ m' <> ms XD3_GETSRCBLK -> do Just n <- requestedBlockNumber stream let blk = xGetSource x n withBlk' act = withByteString blk $ \p len -> do pokeCurrentBlock stream $ CurrentBlock n len p when (len < xBlockSize x) $ do Just src <- readMutVar $ streamSource stream writeAtByte src #{offset xd3_source, eof_known} (1 :: #{type int}) act go2 withBlk' eof ds XD3_GOTHEADER -> go2 withBlk eof ds -- No XD3_WINSTART -> go2 withBlk eof ds -- action XD3_WINFINISH -> go2 withBlk eof ds -- neccessary -- -- These are set for each XD3_WINFINISH return. -- xd3_encoder_used_source :: Ptr Stream -> IO Bool -- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t -- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t e -> do s <- errorString stream xOnError x e s xInterleave x $ go id ds foreign import ccall "xdelta3.h xd3_decode_input" xd3_decode_input :: Ptr Xd3Stream -> IO ErrorCode foreign import ccall "xdelta3.h xd3_encode_input" xd3_encode_input :: Ptr Xd3Stream -> IO ErrorCode decode_input :: PrimMonad m => Stream m -> m ErrorCode decode_input stream = unsafeIOToPrim $ xd3_decode_input (ptr $ mutableByteArrayContents $ streamArray stream) encode_input :: PrimMonad m => Stream m -> m ErrorCode encode_input stream = unsafeIOToPrim $ xd3_encode_input (ptr $ mutableByteArrayContents $ streamArray stream) newtype XDelta = XDelta L.ByteString deriving Show chunksOf :: Usize_t -> L.ByteString -> [B.ByteString] chunksOf len bs | L.null bs = [] | otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs in L.toStrict b : chunksOf len bs' computeDiff :: Config -> L.ByteString -> L.ByteString -> XDeltaFailable XDelta computeDiff cfg source patched = fmap XDelta $ xdeltaPure encode_input cfg source patched applyPatch :: Config -> L.ByteString -> XDelta -> XDeltaFailable L.ByteString applyPatch cfg source (XDelta delta) = xdeltaPure decode_input cfg source delta data XDeltaFailable x = XResult { xresult :: x, xerror :: (Maybe (ErrorCode,String)) } deriving (Show,Functor) instance Monoid x => Monoid (XDeltaFailable x) where mempty = XResult mempty Nothing mappend (XResult x xe) y = XResult (mappend x $ xresult y) (maybe (xerror y) Just xe) xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> XDeltaFailable L.ByteString xdeltaPure codec cfg source delta = let smap = IntMap.fromList $ zip [0..] (chunksOf bsize source) bsize = 4096 x :: XDeltaMethods (ST s) (XDeltaFailable L.ByteString) x = XDeltaMethods { xConfig = cfg , xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of Nothing -> B.empty Just bs -> bs , xOutput = \ptr len -> unsafeIOToST $ flip XResult Nothing . L.fromStrict <$> B.packCStringLen (castPtr ptr,len) , xOnError = \e s -> return (XResult L.empty (Just (e,s))) -- :: ErrorCode -> String -> m u , xBlockSize = bsize -- :: Usize_t , xInterleave = unsafeInterleaveST -- :: forall a. m a -> m a } ds = chunksOf bsize delta -- L.toChunks delta in runST $ xdelta x codec ds defaultConfig :: Config defaultConfig = Config { winsize = XD3_DEFAULT_WINSIZE , sprevsz = XD3_DEFAULT_SPREVSZ , iopt_size = XD3_DEFAULT_IOPT_SIZE , flags = mempty , sec_data = CompressorConfig 0 0 0 , sec_inst = CompressorConfig 0 0 0 , sec_addr = CompressorConfig 0 0 0 , smatch_cfg = Right SMATCH_DEFAULT }