From 4aab5a236e578f3cd97566bc142027e06e955f73 Mon Sep 17 00:00:00 2001 From: Joe Crayne Date: Sat, 27 Oct 2018 16:26:10 -0400 Subject: build fix --- haskell/Data/VCDIFF.hs | 255 ++++++++++++++++++++++++++++ haskell/Data/VCDIFF.hsc | 368 ----------------------------------------- haskell/Data/VCDIFF/XDelta.hsc | 202 ++++++++++++++++++++++ haskell/Text/XXD.hs | 48 ------ haskell/examples/Text/XXD.hs | 48 ++++++ xdelta.cabal | 4 +- 6 files changed, 507 insertions(+), 418 deletions(-) create mode 100644 haskell/Data/VCDIFF.hs delete mode 100644 haskell/Data/VCDIFF.hsc create mode 100644 haskell/Data/VCDIFF/XDelta.hsc delete mode 100644 haskell/Text/XXD.hs create mode 100644 haskell/examples/Text/XXD.hs diff --git a/haskell/Data/VCDIFF.hs b/haskell/Data/VCDIFF.hs new file mode 100644 index 0000000..a776052 --- /dev/null +++ b/haskell/Data/VCDIFF.hs @@ -0,0 +1,255 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NondecreasingIndentation #-} +{-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} +module Data.VCDIFF where + +import Control.Monad +import Control.Monad.Primitive +import Control.Monad.ST +import Control.Monad.ST.Unsafe +import Data.Bits +import qualified Data.ByteString as B +import qualified Data.ByteString.Unsafe as B +import qualified Data.ByteString.Internal as B +import qualified Data.ByteString.Lazy as L +import Data.Coerce +import Data.Int +import qualified Data.IntMap as IntMap +import Data.Monoid +import Data.Primitive.Addr +import Data.Primitive.ByteArray +import Data.Primitive.ByteArray.Util +import Data.Primitive.MutVar +import Data.STRef +import qualified Data.Text as T +import Data.Text.Encoding +import Data.Word +import Foreign.C.Types +import Foreign.C.String +import Foreign.ForeignPtr (withForeignPtr) +import Foreign.Ptr +import Foreign.Concurrent +import Foreign.Storable +import Foreign.ForeignPtr (ForeignPtr) +import GHC.Exts +import GHC.TypeLits + +import Data.VCDIFF.Types +import Data.VCDIFF.XDelta + +data Stream m = Stream + { streamArray :: StreamArray m + , streamPtr :: ForeignPtr Xd3Stream -- ^ HACK: this is only used to attach a finalizer + -- to 'streamArray'. Don't use this pointer. + -- This would be unnecessary if I could create a + -- MutableByteArray with a finalizer attached. + , streamSource :: MutVar (PrimState m) (Maybe (Source m)) + } + + +-- The xd3_config structure is used to initialize a stream - all data +-- is copied into stream so config may be a temporary variable. See +-- the [documentation] or comments on the xd3_config structure. +config_stream :: PrimMonad m => Config -> m (Either ErrorCode (Stream m)) +config_stream cfg = do + (s,nptr) <- newStreamArray (either (Just . smName) (const Nothing) (smatch_cfg cfg)) + c <- packConfig nptr cfg + let cptr = ptr (byteArrayContents c) :: Ptr Config + sptr = streamArrayPtr s + srcvar <- newMutVar Nothing + stream <- unsafeIOToPrim $ do + let finalize = do + -- freeHaskellFunPtr: aloc,free,getblk + xd3_abort_stream sptr + xd3_close_stream sptr + xd3_free_stream sptr + seq srcvar $ seq s $ return () -- Keep array s alive until the ffi functions finish. + fp <- newForeignPtr sptr finalize + return Stream + { streamArray = s + , streamPtr = fp + , streamSource = srcvar + } + unsafeIOToPrim (xd3_config_stream sptr cptr) >>= \case + XD3_SUCCESS -> return $ c `seq` Right stream + ecode -> return $ Left ecode + + +set_source :: PrimMonad m => + Stream m -> String -- ^ name for debug/print purposes. + -> Usize_t -- ^ block size + -> Xoff_t -- ^ maximum visible buffer (Suggested: set same as block size). + -- Rounds up to approx 16k. + -> m () +set_source stream nm blksz maxwinsz = do + src <- newSource nm blksz maxwinsz + {- + writeAtByte (streamArray stream) + #{offset xd3_stream, getblk} + nullPtr -- xdelta3.h documents this as an internal field. + -} + let strm = streamArrayPtr $ streamArray stream + unsafeIOToPrim (xd3_set_source strm $ sourcePtr src) + writeMutVar (streamSource stream) (Just src) + +data XDeltaMethods m u = XDeltaMethods + { xConfig :: Config + , xGetSource :: Xoff_t -> B.ByteString + , xOutput :: Ptr Word8 -> Int -> m u + , xOnError :: ErrorCode -> String -> m u + , xBlockSize :: Usize_t + , xInterleave :: forall a. m a -> m a + } + +-- -- | Checks for legal flag changes. +-- foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Xd3Stream -> Flags -> IO () + +-- -- declared static +-- foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Xd3Stream -> Ptr a -> Usize_t -> IO () + +-- | This acknowledges receipt of output data, must be called after any +-- XD3_OUTPUT return. +-- -- declared static +-- foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Xd3Stream -> IO () + +requestedBlockNumber :: PrimMonad m => Stream m -> m (Maybe Xoff_t) +requestedBlockNumber stream = do + msrc <- readMutVar $ streamSource stream + forM msrc sourceRequestedBlocknumber + +-- -- declared static +-- foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Xd3Stream -> IO CString + +pokeCurrentBlock :: PrimMonad m => Stream m -> CurrentBlock -> m () +pokeCurrentBlock stream blk = do + msrc <- readMutVar $ streamSource stream + forM_ msrc (`sourceWriteCurrentBlock` blk) + +withByteString :: PrimBase m => B.ByteString -> (Ptr b -> Usize_t -> m a) -> m a +withByteString d act = + let (fp,off,len) = B.toForeignPtr d + in unsafeIOToPrim $ withForeignPtr fp $ \ptr -> unsafePrimToIO $ do + act (ptr `plusPtr` off) (fromIntegral len) + +xdelta :: (PrimBase m, Monoid u) => XDeltaMethods m u -> (Stream m -> m ErrorCode) -> [B.ByteString] -> m u +xdelta x xxcode_input ds = do + mstream <- config_stream (xConfig x) + either (\e _ -> xOnError x e "config_stream failed") + (flip ($)) + mstream $ \stream -> do + set_source stream "VCDIFF" (xBlockSize x) (xBlockSize x) + let go withBlk [] = return mempty + go withBlk (d:ds) = do + let (fp,off,len) = B.toForeignPtr d + eof = null ds + when eof $ setFlush (streamArray stream) True + withByteString d $ \indata len -> do + avail_input (streamArray stream) indata len + go2 withBlk eof ds + go2 withBlk eof ds = do + ret <- withBlk $ xxcode_input stream + case ret of + XD3_INPUT -> if (not eof) then go withBlk ds else return mempty + XD3_OUTPUT -> do + m' <- nextOut (streamArray stream) (\(p,len) -> xOutput x p (fromIntegral len)) + ms <- xInterleave x $ go2 withBlk eof ds + return $ m' <> ms + XD3_GETSRCBLK -> do + Just n <- requestedBlockNumber stream + let blk = xGetSource x n + withBlk' act = withByteString blk $ \p len -> do + pokeCurrentBlock stream $ CurrentBlock n len p + when (len < xBlockSize x) $ do + Just src <- readMutVar $ streamSource stream + sourceWriteEOFKnown src True + act + go2 withBlk' eof ds + XD3_GOTHEADER -> go2 withBlk eof ds -- No + XD3_WINSTART -> go2 withBlk eof ds -- action + XD3_WINFINISH -> go2 withBlk eof ds -- neccessary + -- -- These are set for each XD3_WINFINISH return. + -- xd3_encoder_used_source :: Ptr Stream -> IO Bool + -- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t + -- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t + e -> do + s <- errorString (streamArray stream) + xOnError x e s + xInterleave x $ go id ds + + +decode_input :: PrimMonad m => Stream m -> m ErrorCode +decode_input stream = + unsafeIOToPrim $ xd3_decode_input (streamArrayPtr $ streamArray stream) + +encode_input :: PrimMonad m => Stream m -> m ErrorCode +encode_input stream = + unsafeIOToPrim $ xd3_encode_input (streamArrayPtr $ streamArray stream) + +-- RFC 3284 +newtype VCDIFF = VCDIFF L.ByteString + deriving Show + +chunksOf :: Usize_t -> L.ByteString -> [B.ByteString] +chunksOf len bs | L.null bs = [] + | otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs + in L.toStrict b : chunksOf len bs' + +computeDiff :: Config -> L.ByteString -> L.ByteString -> Result VCDIFF +computeDiff cfg source patched = fmap VCDIFF $ xdeltaPure encode_input cfg source patched + +applyPatch :: Config -> L.ByteString -> VCDIFF -> Result L.ByteString +applyPatch cfg source (VCDIFF delta) = xdeltaPure decode_input cfg source delta + +data Result x = Result + { result :: x -- ^ A possibly invalid result. To consume a lazy stream with fusion, avoid + -- evaluating 'resultError' until this field is fully processed. + , resultError :: Maybe (ErrorCode,String) + -- ^ If something went wrong while producing 'result', this + -- is an error code and message indicating what. + } deriving (Show,Functor) + +instance Monoid x => Monoid (Result x) where + mempty = Result mempty Nothing + mappend (Result x xe) y = Result (mappend x $ result y) (maybe (resultError y) Just xe) + +xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> Result L.ByteString +xdeltaPure codec cfg source input = + let bsize = chunk_size cfg + ds = chunksOf bsize input + smap = IntMap.fromList $ zip [0..] (chunksOf bsize source) + x :: XDeltaMethods (ST s) (Result L.ByteString) + x = XDeltaMethods + { xConfig = cfg + , xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of + Nothing -> B.empty + Just bs -> bs + , xOutput = \ptr len -> unsafeIOToST $ flip Result Nothing . L.fromStrict + <$> B.packCStringLen (castPtr ptr,len) + , xOnError = \e s -> return (Result L.empty (Just (e,s))) + , xBlockSize = bsize + , xInterleave = unsafeInterleaveST + } + in runST $ xdelta x codec ds + +defaultConfig :: Config +defaultConfig = Config + { winsize = XD3_DEFAULT_WINSIZE + , sprevsz = XD3_DEFAULT_SPREVSZ + , iopt_size = XD3_DEFAULT_IOPT_SIZE + , flags = mempty + , sec_data = CompressorConfig 0 0 0 + , sec_inst = CompressorConfig 0 0 0 + , sec_addr = CompressorConfig 0 0 0 + , smatch_cfg = Right SMATCH_DEFAULT + , chunk_size = 4096 + } diff --git a/haskell/Data/VCDIFF.hsc b/haskell/Data/VCDIFF.hsc deleted file mode 100644 index 804b119..0000000 --- a/haskell/Data/VCDIFF.hsc +++ /dev/null @@ -1,368 +0,0 @@ -{-# OPTIONS_GHC -Wno-partial-type-signatures #-} -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE DeriveFunctor #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE NondecreasingIndentation #-} -{-# LANGUAGE PartialTypeSignatures #-} -{-# LANGUAGE PatternSynonyms #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TypeOperators #-} -module Data.VCDIFF where - -import Control.Monad -import Control.Monad.Primitive -import Control.Monad.ST -import Control.Monad.ST.Unsafe -import Data.Bits -import qualified Data.ByteString as B -import qualified Data.ByteString.Unsafe as B -import qualified Data.ByteString.Internal as B -import qualified Data.ByteString.Lazy as L -import Data.Coerce -import Data.Int -import qualified Data.IntMap as IntMap -import Data.Monoid -import Data.Primitive.Addr -import Data.Primitive.ByteArray -import Data.Primitive.ByteArray.Util -import Data.Primitive.MutVar -import Data.STRef -import qualified Data.Text as T -import Data.Text.Encoding -import Data.Word -import Foreign.C.Types -import Foreign.C.String -import Foreign.ForeignPtr (withForeignPtr) -import Foreign.Ptr -import Foreign.Concurrent -import Foreign.Storable -import Foreign.ForeignPtr (ForeignPtr) -import GHC.Exts -import GHC.TypeLits - -import Data.VCDIFF.Types -import Data.VCDIFF.XDelta - -#ifndef SIZEOF_SIZE_T -#define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ -#define SIZEOF_UNSIGNED_INT __SIZEOF_INT__ -#define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__ -#define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__ -#define static_assert(...) -#endif -#include - -#include "offset.h" - -data Stream m = Stream - { streamArray :: MutableByteArray (PrimState m) - , streamPtr :: ForeignPtr Xd3Stream -- ^ HACK: this is only used to attach a finalizer - -- to 'streamArray'. Don't use this pointer. - -- This would be unnecessary if I could create a - -- MutableByteArray with a finalizer attached. - , streamSource :: MutVar (PrimState m) (Maybe (Source m)) - } - -foreign import ccall "xdelta3.h xd3_config_stream" xd3_config_stream :: Ptr Xd3Stream -> Ptr Config -> IO ErrorCode - -foreign import ccall "xdelta3.h xd3_free_stream" xd3_free_stream :: Ptr Xd3Stream -> IO () - -foreign import ccall "xdelta3.h xd3_abort_stream" xd3_abort_stream :: Ptr Xd3Stream -> IO () - -foreign import ccall "xdelta3.h xd3_close_stream" xd3_close_stream :: Ptr Xd3Stream -> IO ErrorCode - - - -writeCompressorConfig :: (PrimMonad m, _) => MutableByteArray (PrimState m) -> Offset j -> CompressorConfig -> m () -writeCompressorConfig c o sec = do - writeAtByte c (o +. #{off xd3_sec_cfg,ngroups}) (ngroups sec) - writeAtByte c (o +. #{off xd3_sec_cfg,sector_size}) (sector_size sec) - writeAtByte c (o +. #{off xd3_sec_cfg,inefficient}) (inefficient sec) - -writeMatcher :: (PrimMonad m, _) => MutableByteArray (PrimState m) -> Offset j -> StringMatcher -> m () -writeMatcher c o sm = do - -- handled elsewhere: const char *name; <- smName :: String - writeAtByte c (o +. #{off xd3_smatcher, string_match }) (smStringMatch sm) - writeAtByte c (o +. #{off xd3_smatcher, large_look }) (smLargeLook sm) - writeAtByte c (o +. #{off xd3_smatcher, large_step }) (smLargeStep sm) - writeAtByte c (o +. #{off xd3_smatcher, small_look }) (smSmallLook sm) - writeAtByte c (o +. #{off xd3_smatcher, small_chain }) (smSmallChain sm) - writeAtByte c (o +. #{off xd3_smatcher, small_lchain }) (smSmallLchain sm) - writeAtByte c (o +. #{off xd3_smatcher, max_lazy }) (smMaxLazy sm) - writeAtByte c (o +. #{off xd3_smatcher, long_enough }) (smLongEnough sm) - --- The xd3_config structure is used to initialize a stream - all data --- is copied into stream so config may be a temporary variable. See --- the [documentation] or comments on the xd3_config structure. -config_stream :: PrimMonad m => Config -> m (Either ErrorCode (Stream m)) -config_stream cfg = do - let (len,n) = case smatch_cfg cfg of - Left m -> let n = encodeUtf8 $ T.pack $ smName m - in ( #{const sizeof(xd3_stream)} + B.length n + 1 - , n ) - Right _ -> ( #{const sizeof(xd3_stream)}, B.empty ) - s <- newPinnedByteArray len - let sptr = ptr (mutableByteArrayContents s) :: Ptr Xd3Stream - fillByteArray s 0 #{const sizeof(xd3_stream)} 0 - nptr <- case smatch_cfg cfg of - Right _ -> writeStringAt s #{const sizeof(xd3_stream)} n - Left _ -> return nullPtr - c <- do - c <- newPinnedByteArray #const sizeof(xd3_config) - fillByteArray c 0 #{const sizeof(xd3_config)} 0 - writeAtByte c #{off xd3_config, winsize} (winsize cfg) - writeAtByte c #{off xd3_config, sprevsz} (sprevsz cfg) - writeAtByte c #{off xd3_config, iopt_size} (iopt_size cfg) - writeAtByte c #{off xd3_config, flags} (coerce (flags cfg) :: Word32) - writeCompressorConfig c #{off xd3_config, sec_data} (sec_data cfg) - writeCompressorConfig c #{off xd3_config, sec_inst} (sec_inst cfg) - writeCompressorConfig c #{off xd3_config, sec_addr} (sec_addr cfg) - let msel :: #type xd3_smatch_cfg - msel = either (const #{const XD3_SMATCH_SOFT}) - (fromIntegral . fromEnum) - (smatch_cfg cfg) - writeAtByte c (#{off xd3_config, smatch_cfg}) msel - case smatch_cfg cfg of - Right _ -> return () - Left matcher -> do - let o = offset :: Offset #offset xd3_config,smatcher_soft - writeAtByte c (o +. (#{off xd3_smatcher, name})) nptr - writeMatcher c o matcher - unsafeFreezeByteArray c - let cptr = ptr (byteArrayContents c) :: Ptr Config - srcvar <- newMutVar Nothing - stream <- unsafeIOToPrim $ do - let finalize = do - -- freeHaskellFunPtr: aloc,free,getblk - xd3_abort_stream sptr - xd3_close_stream sptr - xd3_free_stream sptr - seq srcvar $ seq s $ return () -- Keep array s alive until the ffi functions finish. - fp <- newForeignPtr sptr finalize - return Stream - { streamArray = s - , streamPtr = fp - , streamSource = srcvar - } - unsafeIOToPrim (xd3_config_stream sptr cptr) >>= \case - XD3_SUCCESS -> return $ c `seq` Right stream - ecode -> return $ Left ecode - -foreign import ccall "xdelta3.h xd3_set_source" xd3_set_source :: Ptr Xd3Stream -> Ptr Xd3Source -> IO ErrorCode - - -set_source :: PrimMonad m => - Stream m -> String -- ^ name for debug/print purposes. - -> Usize_t -- ^ block size - -> Xoff_t -- ^ maximum visible buffer (Suggested: set same as block size). - -- Rounds up to approx 16k. - -> m () -set_source stream nm blksz maxwinsz = do - src <- newSource nm blksz maxwinsz - {- - writeAtByte (streamArray stream) - #{offset xd3_stream, getblk} - nullPtr -- xdelta3.h documents this as an internal field. - -} - let strm = ptr (mutableByteArrayContents $ streamArray stream) - unsafeIOToPrim (xd3_set_source strm $ sourcePtr src) - writeMutVar (streamSource stream) (Just src) - -data XDeltaMethods m u = XDeltaMethods - { xConfig :: Config - , xGetSource :: Xoff_t -> B.ByteString - , xOutput :: Ptr Word8 -> Int -> m u - , xOnError :: ErrorCode -> String -> m u - , xBlockSize :: Usize_t - , xInterleave :: forall a. m a -> m a - } - --- -- | Checks for legal flag changes. --- foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Xd3Stream -> Flags -> IO () - -setFlag :: PrimMonad m => Flags -> Stream m -> Bool -> m () -setFlag b stream wantFlush = do - f <- readAtByte (streamArray stream) (#{off xd3_stream, flags}) - writeAtByte (streamArray stream) (#{off xd3_stream, flags}) - . (coerce :: Flags -> Word32) - $ if wantFlush then Flags f .|. b - else Flags f .&. complement b - -setFlush :: PrimMonad m => Stream m -> Bool -> m () -setFlush = setFlag XD3_FLUSH - -setSkipWindow :: PrimMonad m => Stream m -> Bool -> m () -setSkipWindow = setFlag XD3_SKIP_WINDOW - --- -- declared static --- foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Xd3Stream -> Ptr a -> Usize_t -> IO () - -avail_input :: PrimMonad m => Stream m -> Ptr a -> Usize_t -> m () -avail_input stream p sz = do - writeAtByte (streamArray stream) (#{off xd3_stream, next_in}) p - writeAtByte (streamArray stream) (#{off xd3_stream, avail_in}) sz - --- | This acknowledges receipt of output data, must be called after any --- XD3_OUTPUT return. --- -- declared static --- foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Xd3Stream -> IO () - -nextOut :: PrimMonad m => Stream m -> ((Ptr Word8, Usize_t) -> m a) -> m a -nextOut stream action = do - buf <- (,) - <$> readAtByte (streamArray stream) (#{off xd3_stream, next_out}) - <*> readAtByte (streamArray stream) (#{off xd3_stream, avail_out}) - a <- action buf - -- unsafeIOToPrim $ xd3_consume_output (ptr $ mutableByteArrayContents $ streamArray stream) - writeAtByte (streamArray stream) (#{off xd3_stream, avail_out}) (0 :: Usize_t) - return a - -requestedBlockNumber :: PrimMonad m => Stream m -> m (Maybe Xoff_t) -requestedBlockNumber stream = do - msrc <- readMutVar $ streamSource stream - forM msrc sourceRequestedBlocknumber - --- -- declared static --- foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Xd3Stream -> IO CString - --- | Gives some extra information about the latest library error, if any --- is known. -errorString :: PrimMonad m => Stream m -> m String -errorString stream = do - cstring <- readAtByte (streamArray stream) (#{off xd3_stream, msg}) - if cstring /= nullPtr - then unsafeIOToPrim $ peekCString cstring -- TODO: avoid unsafeIOToPrim - else return "" - -pokeCurrentBlock :: PrimMonad m => Stream m -> CurrentBlock -> m () -pokeCurrentBlock stream blk = do - msrc <- readMutVar $ streamSource stream - forM_ msrc (`sourceWriteCurrentBlock` blk) - -withByteString :: PrimBase m => B.ByteString -> (Ptr b -> Usize_t -> m a) -> m a -withByteString d act = - let (fp,off,len) = B.toForeignPtr d - in unsafeIOToPrim $ withForeignPtr fp $ \ptr -> unsafePrimToIO $ do - act (ptr `plusPtr` off) (fromIntegral len) - -xdelta :: (PrimBase m, Monoid u) => XDeltaMethods m u -> (Stream m -> m ErrorCode) -> [B.ByteString] -> m u -xdelta x xxcode_input ds = do - mstream <- config_stream (xConfig x) - either (\e _ -> xOnError x e "config_stream failed") - (flip ($)) - mstream $ \stream -> do - set_source stream "VCDIFF" (xBlockSize x) (xBlockSize x) - let go withBlk [] = return mempty - go withBlk (d:ds) = do - let (fp,off,len) = B.toForeignPtr d - eof = null ds - when eof $ setFlush stream True - withByteString d $ \indata len -> do - avail_input stream indata len - go2 withBlk eof ds - go2 withBlk eof ds = do - ret <- withBlk $ xxcode_input stream - case ret of - XD3_INPUT -> if (not eof) then go withBlk ds else return mempty - XD3_OUTPUT -> do - m' <- nextOut stream (\(p,len) -> xOutput x p (fromIntegral len)) - ms <- xInterleave x $ go2 withBlk eof ds - return $ m' <> ms - XD3_GETSRCBLK -> do - Just n <- requestedBlockNumber stream - let blk = xGetSource x n - withBlk' act = withByteString blk $ \p len -> do - pokeCurrentBlock stream $ CurrentBlock n len p - when (len < xBlockSize x) $ do - Just src <- readMutVar $ streamSource stream - sourceWriteEOFKnown src True - act - go2 withBlk' eof ds - XD3_GOTHEADER -> go2 withBlk eof ds -- No - XD3_WINSTART -> go2 withBlk eof ds -- action - XD3_WINFINISH -> go2 withBlk eof ds -- neccessary - -- -- These are set for each XD3_WINFINISH return. - -- xd3_encoder_used_source :: Ptr Stream -> IO Bool - -- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t - -- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t - e -> do - s <- errorString stream - xOnError x e s - xInterleave x $ go id ds - - -foreign import ccall "xdelta3.h xd3_decode_input" xd3_decode_input :: Ptr Xd3Stream -> IO ErrorCode -foreign import ccall "xdelta3.h xd3_encode_input" xd3_encode_input :: Ptr Xd3Stream -> IO ErrorCode - -decode_input :: PrimMonad m => Stream m -> m ErrorCode -decode_input stream = - unsafeIOToPrim $ xd3_decode_input (ptr $ mutableByteArrayContents $ streamArray stream) - -encode_input :: PrimMonad m => Stream m -> m ErrorCode -encode_input stream = - unsafeIOToPrim $ xd3_encode_input (ptr $ mutableByteArrayContents $ streamArray stream) - --- RFC 3284 -newtype VCDIFF = VCDIFF L.ByteString - deriving Show - -chunksOf :: Usize_t -> L.ByteString -> [B.ByteString] -chunksOf len bs | L.null bs = [] - | otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs - in L.toStrict b : chunksOf len bs' - -computeDiff :: Config -> L.ByteString -> L.ByteString -> Result VCDIFF -computeDiff cfg source patched = fmap VCDIFF $ xdeltaPure encode_input cfg source patched - -applyPatch :: Config -> L.ByteString -> VCDIFF -> Result L.ByteString -applyPatch cfg source (VCDIFF delta) = xdeltaPure decode_input cfg source delta - -data Result x = Result - { result :: x -- ^ A possibly invalid result. To consume a lazy stream with fusion, avoid - -- evaluating 'resultError' until this field is fully processed. - , resultError :: Maybe (ErrorCode,String) - -- ^ If something went wrong while producing 'result', this - -- is an error code and message indicating what. - } deriving (Show,Functor) - -instance Monoid x => Monoid (Result x) where - mempty = Result mempty Nothing - mappend (Result x xe) y = Result (mappend x $ result y) (maybe (resultError y) Just xe) - -xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> Result L.ByteString -xdeltaPure codec cfg source input = - let bsize = chunk_size cfg - ds = chunksOf bsize input - smap = IntMap.fromList $ zip [0..] (chunksOf bsize source) - x :: XDeltaMethods (ST s) (Result L.ByteString) - x = XDeltaMethods - { xConfig = cfg - , xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of - Nothing -> B.empty - Just bs -> bs - , xOutput = \ptr len -> unsafeIOToST $ flip Result Nothing . L.fromStrict - <$> B.packCStringLen (castPtr ptr,len) - , xOnError = \e s -> return (Result L.empty (Just (e,s))) - , xBlockSize = bsize - , xInterleave = unsafeInterleaveST - } - in runST $ xdelta x codec ds - -defaultConfig :: Config -defaultConfig = Config - { winsize = XD3_DEFAULT_WINSIZE - , sprevsz = XD3_DEFAULT_SPREVSZ - , iopt_size = XD3_DEFAULT_IOPT_SIZE - , flags = mempty - , sec_data = CompressorConfig 0 0 0 - , sec_inst = CompressorConfig 0 0 0 - , sec_addr = CompressorConfig 0 0 0 - , smatch_cfg = Right SMATCH_DEFAULT - , chunk_size = 4096 - } diff --git a/haskell/Data/VCDIFF/XDelta.hsc b/haskell/Data/VCDIFF/XDelta.hsc new file mode 100644 index 0000000..6e32494 --- /dev/null +++ b/haskell/Data/VCDIFF/XDelta.hsc @@ -0,0 +1,202 @@ +{-# OPTIONS_GHC -Wno-partial-type-signatures #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE PartialTypeSignatures #-} +{-# LANGUAGE TypeFamilies #-} +module Data.VCDIFF.XDelta where + +import Control.Monad +import Control.Monad.Primitive +import Data.Bits +import qualified Data.ByteString as B +import Data.Coerce +import Data.Int +import Data.Primitive.ByteArray +import Data.Primitive.ByteArray.Util +import qualified Data.Text as T +import Data.Text.Encoding +import Data.VCDIFF.Types +import Data.Word +import Foreign.C.Types +import Foreign.C.String +import Foreign.Ptr + +#ifndef SIZEOF_SIZE_T +#define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ +#define SIZEOF_UNSIGNED_INT __SIZEOF_INT__ +#define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__ +#define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__ +#define static_assert(...) +#endif +#include + +#include "offset.h" + +type instance SizeOf Usize_t = #const sizeof(usize_t) +type instance SizeOf (FunPtr a) = #const sizeof(void(*)()) +type instance SizeOf (Ptr a) = #const sizeof(void*) +type instance SizeOf #{type int} = #const sizeof(int) +type instance SizeOf #{type unsigned int} = #const sizeof(unsigned int) + + +data Xd3Source + +newtype Source m = Source (MutableByteArray (PrimState m)) + +newSource :: PrimMonad m => + String -- ^ name for debug/print purposes. + -> Usize_t -- ^ block size + -> Xoff_t -- ^ maximum visible buffer (Suggested: set same as block size). + -- Rounds up to approx 16k. + -> m (Source m) +newSource nm blksz maxwinsz = do + let bsname = encodeUtf8 $ T.pack nm + src <- newPinnedByteArray $ 1 + B.length bsname + #{const sizeof(xd3_source)} + nptr <- writeStringAt src #{const sizeof(xd3_source)} bsname + writeAtByte src (#{off xd3_source, blksize }) blksz + writeAtByte src (#{off xd3_source, name }) nptr + writeAtByte src (#{off xd3_source, max_winsize}) maxwinsz + writeAtByte src (#{off xd3_source, curblkno }) (maxBound :: Xoff_t) + return $ Source src + +sourcePtr :: Source m -> Ptr Xd3Source +sourcePtr (Source src) = ptr (mutableByteArrayContents src) + +sourceRequestedBlocknumber :: PrimMonad m => Source m -> m Xoff_t +sourceRequestedBlocknumber (Source src) = readAtByte src (#{off xd3_source, getblkno}) + +data CurrentBlock = CurrentBlock + { blkno :: !Xoff_t -- ^ current block number + , blkSize :: !Usize_t -- ^ number of bytes on current block: must be >= 0 and <= 'srcBlockSize' + , blkPtr :: !(Ptr Word8) -- ^ current block array + } + +sourceWriteCurrentBlock :: PrimMonad m => Source m -> CurrentBlock -> m () +sourceWriteCurrentBlock (Source src) (CurrentBlock no sz ptr) = do + writeAtByte src (#{off xd3_source, curblkno}) no + writeAtByte src (#{off xd3_source, onblk}) sz + writeAtByte src (#{off xd3_source, curblk}) ptr + +sourceWriteEOFKnown :: PrimMonad m => Source m -> Bool -> m () +sourceWriteEOFKnown (Source src) False = writeAtByte src (#{off xd3_source, eof_known}) (0 :: #{type int}) +sourceWriteEOFKnown (Source src) True = writeAtByte src (#{off xd3_source, eof_known}) (1 :: #{type int}) + +newtype StreamArray m = StreamArray (MutableByteArray (PrimState m)) + +newStreamArray :: PrimMonad m => + Maybe String -> m (StreamArray m, CString) +newStreamArray mmatcher = do + let (len,n) = case mmatcher of + Just m -> let n = encodeUtf8 $ T.pack m + in ( #{const sizeof(xd3_stream)} + B.length n + 1 + , n ) + Nothing -> ( #{const sizeof(xd3_stream)}, B.empty ) + s <- newPinnedByteArray len + fillByteArray s 0 #{const sizeof(xd3_stream)} 0 + nptr <- case mmatcher of + Nothing -> writeStringAt s #{const sizeof(xd3_stream)} n + Just _ -> return nullPtr + return (StreamArray s,nptr) + +streamArrayPtr :: StreamArray m -> Ptr Xd3Stream +streamArrayPtr (StreamArray s) = ptr (mutableByteArrayContents s) + +setFlag :: PrimMonad m => Flags -> StreamArray m -> Bool -> m () +setFlag b (StreamArray s) wantFlush = do + f <- readAtByte s (#{off xd3_stream, flags}) + writeAtByte s (#{off xd3_stream, flags}) + . (coerce :: Flags -> Word32) + $ if wantFlush then Flags f .|. b + else Flags f .&. complement b + +setFlush :: PrimMonad m => StreamArray m -> Bool -> m () +setFlush = setFlag XD3_FLUSH + +setSkipWindow :: PrimMonad m => StreamArray m -> Bool -> m () +setSkipWindow = setFlag XD3_SKIP_WINDOW + + +avail_input :: PrimMonad m => StreamArray m -> Ptr a -> Usize_t -> m () +avail_input (StreamArray s) p sz = do + writeAtByte s (#{off xd3_stream, next_in}) p + writeAtByte s (#{off xd3_stream, avail_in}) sz + + +nextOut :: PrimMonad m => StreamArray m -> ((Ptr Word8, Usize_t) -> m a) -> m a +nextOut (StreamArray s) action = do + buf <- (,) + <$> readAtByte s (#{off xd3_stream, next_out}) + <*> readAtByte s (#{off xd3_stream, avail_out}) + a <- action buf + -- unsafeIOToPrim $ xd3_consume_output (ptr $ mutableByteArrayContents $ streamArray stream) + writeAtByte s #{off xd3_stream, avail_out} (0 :: Usize_t) + return a + + +-- | Gives some extra information about the latest library error, if any +-- is known. +errorString :: PrimMonad m => StreamArray m -> m String +errorString (StreamArray s) = do + cstring <- readAtByte s (#{off xd3_stream, msg}) + if cstring /= nullPtr + then unsafeIOToPrim $ peekCString cstring -- TODO: avoid unsafeIOToPrim + else return "" + +writeCompressorConfig :: (PrimMonad m, _) => MutableByteArray (PrimState m) -> Offset j -> CompressorConfig -> m () +writeCompressorConfig c o sec = do + writeAtByte c (o +. #{off xd3_sec_cfg,ngroups}) (ngroups sec) + writeAtByte c (o +. #{off xd3_sec_cfg,sector_size}) (sector_size sec) + writeAtByte c (o +. #{off xd3_sec_cfg,inefficient}) (inefficient sec) + +writeMatcher :: (PrimMonad m, _) => MutableByteArray (PrimState m) -> Offset j -> StringMatcher -> m () +writeMatcher c o sm = do + -- handled elsewhere: const char *name; <- smName :: String + writeAtByte c (o +. #{off xd3_smatcher, string_match }) (smStringMatch sm) + writeAtByte c (o +. #{off xd3_smatcher, large_look }) (smLargeLook sm) + writeAtByte c (o +. #{off xd3_smatcher, large_step }) (smLargeStep sm) + writeAtByte c (o +. #{off xd3_smatcher, small_look }) (smSmallLook sm) + writeAtByte c (o +. #{off xd3_smatcher, small_chain }) (smSmallChain sm) + writeAtByte c (o +. #{off xd3_smatcher, small_lchain }) (smSmallLchain sm) + writeAtByte c (o +. #{off xd3_smatcher, max_lazy }) (smMaxLazy sm) + writeAtByte c (o +. #{off xd3_smatcher, long_enough }) (smLongEnough sm) + +packConfig :: PrimMonad m => CString -- ^ Name of software matcher or nullPtr. + -> Config + -> m ByteArray +packConfig nptr cfg = do + c <- newPinnedByteArray #const sizeof(xd3_config) + fillByteArray c 0 #{const sizeof(xd3_config)} 0 + writeAtByte c #{off xd3_config, winsize} (winsize cfg) + writeAtByte c #{off xd3_config, sprevsz} (sprevsz cfg) + writeAtByte c #{off xd3_config, iopt_size} (iopt_size cfg) + writeAtByte c #{off xd3_config, flags} (coerce (flags cfg) :: Word32) + writeCompressorConfig c #{off xd3_config, sec_data} (sec_data cfg) + writeCompressorConfig c #{off xd3_config, sec_inst} (sec_inst cfg) + writeCompressorConfig c #{off xd3_config, sec_addr} (sec_addr cfg) + let msel :: #type xd3_smatch_cfg + msel = either (const #{const XD3_SMATCH_SOFT}) + (fromIntegral . fromEnum) + (smatch_cfg cfg) + writeAtByte c (#{off xd3_config, smatch_cfg}) msel + let mmatcher = either Just (const Nothing) $ smatch_cfg cfg + forM_ mmatcher $ \matcher -> do + let o = #off xd3_config,smatcher_soft + writeAtByte c (o +. (#{off xd3_smatcher, name})) nptr + writeMatcher c o matcher + unsafeFreezeByteArray c + +foreign import ccall "xdelta3.h xd3_config_stream" xd3_config_stream :: Ptr Xd3Stream -> Ptr Config -> IO ErrorCode + +foreign import ccall "xdelta3.h xd3_free_stream" xd3_free_stream :: Ptr Xd3Stream -> IO () + +foreign import ccall "xdelta3.h xd3_abort_stream" xd3_abort_stream :: Ptr Xd3Stream -> IO () + +foreign import ccall "xdelta3.h xd3_close_stream" xd3_close_stream :: Ptr Xd3Stream -> IO ErrorCode + + +foreign import ccall "xdelta3.h xd3_decode_input" xd3_decode_input :: Ptr Xd3Stream -> IO ErrorCode +foreign import ccall "xdelta3.h xd3_encode_input" xd3_encode_input :: Ptr Xd3Stream -> IO ErrorCode + + +foreign import ccall "xdelta3.h xd3_set_source" xd3_set_source :: Ptr Xd3Stream -> Ptr Xd3Source -> IO ErrorCode + + diff --git a/haskell/Text/XXD.hs b/haskell/Text/XXD.hs deleted file mode 100644 index 77606bf..0000000 --- a/haskell/Text/XXD.hs +++ /dev/null @@ -1,48 +0,0 @@ -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE ScopedTypeVariables #-} -module Text.XXD (xxd, xxd2) where - -import Data.ByteArray (ByteArrayAccess) -import qualified Data.ByteArray as BA -import Data.Word -import Data.Bits -import Data.Char -import Text.Printf - -nibble :: Word8 -> Char -nibble b = intToDigit (fromIntegral (b .&. 0x0F)) - -nibbles :: ByteArrayAccess ba => ba -> String -nibbles xs = unwords $ map (\byte -> [nibble (byte `shiftR` 4), nibble byte]) - $ BA.unpack xs - -xxd0 :: (ByteArrayAccess ba) => (forall b. ByteArrayAccess b => b -> String) -> Int -> ba -> [String] -xxd0 tr offset bs | BA.null bs = [] -xxd0 tr offset bs = printf "%03x: %s%s" offset (nibbles xs) (tr xs) - : xxd0 tr (offset + BA.length xs) bs' - where - (xs,bs') = splitAtView 16 bs - -splitAtView :: (ByteArrayAccess ba) => Int -> ba -> (BA.View ba, BA.View ba) -splitAtView n bs = (BA.takeView bs n, BA.dropView bs n) - -xxd :: ByteArrayAccess a => Int -> a -> [String] -xxd = xxd0 (const "") - --- | like xxd, but also shows ascii -xxd2 :: ByteArrayAccess a => Int -> a -> [String] -xxd2 = xxd0 withAscii - -withAscii :: ByteArrayAccess a => a -> [Char] -withAscii row = replicate (50 - 3 * BA.length row) ' ' ++ myunpack row - where - myunpack s = map word8tochar (BA.unpack s) - where word8tochar w | (w .&. 0x80 /= 0) = '.' - word8tochar w = let c = chr (fromIntegral w) - in if isPrint c then c else '.' - -{- -main = do - bs <- B.getContents - mapM_ putStrLn $ xxd2 0 bs - -} diff --git a/haskell/examples/Text/XXD.hs b/haskell/examples/Text/XXD.hs new file mode 100644 index 0000000..77606bf --- /dev/null +++ b/haskell/examples/Text/XXD.hs @@ -0,0 +1,48 @@ +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +module Text.XXD (xxd, xxd2) where + +import Data.ByteArray (ByteArrayAccess) +import qualified Data.ByteArray as BA +import Data.Word +import Data.Bits +import Data.Char +import Text.Printf + +nibble :: Word8 -> Char +nibble b = intToDigit (fromIntegral (b .&. 0x0F)) + +nibbles :: ByteArrayAccess ba => ba -> String +nibbles xs = unwords $ map (\byte -> [nibble (byte `shiftR` 4), nibble byte]) + $ BA.unpack xs + +xxd0 :: (ByteArrayAccess ba) => (forall b. ByteArrayAccess b => b -> String) -> Int -> ba -> [String] +xxd0 tr offset bs | BA.null bs = [] +xxd0 tr offset bs = printf "%03x: %s%s" offset (nibbles xs) (tr xs) + : xxd0 tr (offset + BA.length xs) bs' + where + (xs,bs') = splitAtView 16 bs + +splitAtView :: (ByteArrayAccess ba) => Int -> ba -> (BA.View ba, BA.View ba) +splitAtView n bs = (BA.takeView bs n, BA.dropView bs n) + +xxd :: ByteArrayAccess a => Int -> a -> [String] +xxd = xxd0 (const "") + +-- | like xxd, but also shows ascii +xxd2 :: ByteArrayAccess a => Int -> a -> [String] +xxd2 = xxd0 withAscii + +withAscii :: ByteArrayAccess a => a -> [Char] +withAscii row = replicate (50 - 3 * BA.length row) ' ' ++ myunpack row + where + myunpack s = map word8tochar (BA.unpack s) + where word8tochar w | (w .&. 0x80 /= 0) = '.' + word8tochar w = let c = chr (fromIntegral w) + in if isPrint c then c else '.' + +{- +main = do + bs <- B.getContents + mapM_ putStrLn $ xxd2 0 bs + -} diff --git a/xdelta.cabal b/xdelta.cabal index 6540559..4864d92 100644 --- a/xdelta.cabal +++ b/xdelta.cabal @@ -11,7 +11,7 @@ maintainer: joe@jerkface.net category: Data build-type: Simple -extra-source-files: xdelta3/*.h xdelta3/*.c +extra-source-files: xdelta3/*.h xdelta3/*.c haskell/*.h library exposed-modules: Data.VCDIFF.Types @@ -33,5 +33,5 @@ library executable testdiff main-is: haskell/examples/testdiff.hs other-modules: Text.XXD - hs-source-dirs: haskell examples . + hs-source-dirs: haskell/examples . build-depends: base, bytestring, memory, xdelta -- cgit v1.2.3