From 9eb3ca619568b8c4b3c3d0ed2da0319f1bd9a4bd Mon Sep 17 00:00:00 2001 From: Joe Crayne Date: Fri, 26 Oct 2018 02:17:42 -0400 Subject: Reorganizing. --- examples/testdiff.hs | 2 +- haskell/Data/BA.hs | 17 -- haskell/Data/Primitive/ByteArray/Util.hs | 45 ++++ haskell/Data/VCDIFF.hsc | 406 +++++++++++++++++++++++++++++++ haskell/Data/VCDIFF/Types.hsc | 208 ++++++++++++++++ haskell/Data/XDelta.hsc | 392 ----------------------------- haskell/XDelta/Types.hsc | 208 ---------------- haskell/offset.h | 2 + lazy.hs | 30 +++ xdelta3.cabal | 23 +- 10 files changed, 699 insertions(+), 634 deletions(-) delete mode 100644 haskell/Data/BA.hs create mode 100644 haskell/Data/Primitive/ByteArray/Util.hs create mode 100644 haskell/Data/VCDIFF.hsc create mode 100644 haskell/Data/VCDIFF/Types.hsc delete mode 100644 haskell/Data/XDelta.hsc delete mode 100644 haskell/XDelta/Types.hsc create mode 100644 haskell/offset.h create mode 100644 lazy.hs diff --git a/examples/testdiff.hs b/examples/testdiff.hs index 2847fc3..7e20dc5 100644 --- a/examples/testdiff.hs +++ b/examples/testdiff.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} import qualified Data.ByteString.Lazy as L -import Data.XDelta +import Data.VCDIFF import Text.XXD source :: L.ByteString diff --git a/haskell/Data/BA.hs b/haskell/Data/BA.hs deleted file mode 100644 index 60b1136..0000000 --- a/haskell/Data/BA.hs +++ /dev/null @@ -1,17 +0,0 @@ -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE ScopedTypeVariables #-} -module Data.BA where - -import GHC.Exts -import Control.Monad.Primitive -import Data.Primitive.Types -import Data.Primitive.ByteArray - --- | WARNING: Unsafe to use this on packed C structs. -writeAtByte :: (Prim a, PrimMonad m) => MutableByteArray (PrimState m) -> Int -> a -> m () -writeAtByte buf offset a = writeByteArray buf (div offset $ I# (sizeOf# a)) a -{-# INLINE writeAtByte #-} - -readAtByte :: forall a m. (Prim a, PrimMonad m) => MutableByteArray (PrimState m) -> Int -> m a -readAtByte buf offset = readByteArray buf (div offset $ I# (sizeOf# (undefined :: a))) -{-# INLINE readAtByte #-} diff --git a/haskell/Data/Primitive/ByteArray/Util.hs b/haskell/Data/Primitive/ByteArray/Util.hs new file mode 100644 index 0000000..1776286 --- /dev/null +++ b/haskell/Data/Primitive/ByteArray/Util.hs @@ -0,0 +1,45 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} +module Data.Primitive.ByteArray.Util where + +import GHC.TypeLits +import Control.Monad.Primitive +import Data.Primitive.Types +import Data.Primitive.ByteArray + +newtype Offset (n :: Nat) = Offset Int + +offset :: KnownNat n => Offset n +offset = let k = Offset $ fromIntegral $ natVal k in k + +(+.) :: Offset j -> Offset k -> Offset (j + k) +Offset j +. Offset k = Offset (j + k) + + +type family SizeOf a :: Nat + +class IsMultipleOf (n::Nat) (k::Nat) + +instance n ~ (q * k) => IsMultipleOf n k + +writeAtByte :: ( Prim a + , PrimMonad m + , IsMultipleOf n (SizeOf a) + ) => MutableByteArray (PrimState m) -> Offset n -> a -> m () +writeAtByte buf (Offset offset) a = writeByteArray buf (div offset $ (sizeOf a)) a +{-# INLINE writeAtByte #-} + +readAtByte :: forall a m n. + ( Prim a + , PrimMonad m + , IsMultipleOf n (SizeOf a) + ) => MutableByteArray (PrimState m) -> Offset n -> m a +readAtByte buf (Offset offset) = readByteArray buf (div offset $ (sizeOf (undefined :: a))) +{-# INLINE readAtByte #-} diff --git a/haskell/Data/VCDIFF.hsc b/haskell/Data/VCDIFF.hsc new file mode 100644 index 0000000..5e484e1 --- /dev/null +++ b/haskell/Data/VCDIFF.hsc @@ -0,0 +1,406 @@ +{-# OPTIONS_GHC -Wno-partial-type-signatures #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NondecreasingIndentation #-} +{-# LANGUAGE PartialTypeSignatures #-} +{-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} +module Data.VCDIFF where + +import Control.Monad +import Control.Monad.Primitive +import Control.Monad.ST +import Control.Monad.ST.Unsafe +import Data.Bits +import qualified Data.ByteString as B +import qualified Data.ByteString.Unsafe as B +import qualified Data.ByteString.Internal as B +import qualified Data.ByteString.Lazy as L +import Data.Coerce +import Data.Int +import qualified Data.IntMap as IntMap +import Data.Monoid +import Data.Primitive.Addr +import Data.Primitive.ByteArray +import Data.Primitive.ByteArray.Util +import Data.Primitive.MutVar +import Data.STRef +import qualified Data.Text as T +import Data.Text.Encoding +import Data.Word +import Foreign.C.Types +import Foreign.C.String +import Foreign.ForeignPtr (withForeignPtr) +import Foreign.Ptr +import Foreign.Concurrent +import Foreign.Storable +import Foreign.ForeignPtr (ForeignPtr) +import GHC.Exts +import GHC.TypeLits + +import Data.VCDIFF.Types + +#ifndef SIZEOF_SIZE_T +#define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ +#define SIZEOF_UNSIGNED_INT __SIZEOF_INT__ +#define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__ +#define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__ +#define static_assert(...) +#endif +#include + +#include "offset.h" + +data Stream m = Stream + { streamArray :: MutableByteArray (PrimState m) + , streamPtr :: ForeignPtr Xd3Stream -- ^ HACK: this is only used to attach a finalizer + -- to 'streamArray'. Don't use this pointer. + -- This would be unnecessary if I could create a + -- MutableByteArray with a finalizer attached. + , streamSource :: MutVar (PrimState m) (Maybe (MutableByteArray (PrimState m))) + } + +foreign import ccall "xdelta3.h xd3_config_stream" xd3_config_stream :: Ptr Xd3Stream -> Ptr Config -> IO ErrorCode + +foreign import ccall "xdelta3.h xd3_free_stream" xd3_free_stream :: Ptr Xd3Stream -> IO () + +foreign import ccall "xdelta3.h xd3_abort_stream" xd3_abort_stream :: Ptr Xd3Stream -> IO () + +foreign import ccall "xdelta3.h xd3_close_stream" xd3_close_stream :: Ptr Xd3Stream -> IO ErrorCode + +type instance SizeOf Usize_t = #const sizeof(usize_t) +type instance SizeOf (FunPtr a) = #const sizeof(void(*)()) +type instance SizeOf (Ptr a) = #const sizeof(void*) +type instance SizeOf #{type int} = #const sizeof(int) +type instance SizeOf #{type unsigned int} = #const sizeof(unsigned int) + + + +writeCompressorConfig :: (PrimMonad m, _) => MutableByteArray (PrimState m) -> Offset j -> CompressorConfig -> m () +writeCompressorConfig c o sec = do + writeAtByte c (o +. #{off xd3_sec_cfg,ngroups}) (ngroups sec) + writeAtByte c (o +. #{off xd3_sec_cfg,sector_size}) (sector_size sec) + writeAtByte c (o +. #{off xd3_sec_cfg,inefficient}) (inefficient sec) + +writeMatcher :: (PrimMonad m, _) => MutableByteArray (PrimState m) -> Offset j -> StringMatcher -> m () +writeMatcher c o sm = do + -- handled elsewhere: const char *name; <- smName :: String + writeAtByte c (o +. #{off xd3_smatcher, string_match }) (smStringMatch sm) + writeAtByte c (o +. #{off xd3_smatcher, large_look }) (smLargeLook sm) + writeAtByte c (o +. #{off xd3_smatcher, large_step }) (smLargeStep sm) + writeAtByte c (o +. #{off xd3_smatcher, small_look }) (smSmallLook sm) + writeAtByte c (o +. #{off xd3_smatcher, small_chain }) (smSmallChain sm) + writeAtByte c (o +. #{off xd3_smatcher, small_lchain }) (smSmallLchain sm) + writeAtByte c (o +. #{off xd3_smatcher, max_lazy }) (smMaxLazy sm) + writeAtByte c (o +. #{off xd3_smatcher, long_enough }) (smLongEnough sm) + +ptr :: Addr -> Ptr a +ptr (Addr a) = Ptr a + +adr :: Ptr a -> Addr +adr (Ptr a) = Addr a + +-- The xd3_config structure is used to initialize a stream - all data +-- is copied into stream so config may be a temporary variable. See +-- the [documentation] or comments on the xd3_config structure. +config_stream :: PrimMonad m => Config -> m (Either ErrorCode (Stream m)) +config_stream cfg = do + let (len,n) = case smatch_cfg cfg of + Left m -> let n = encodeUtf8 $ T.pack $ smName m + in ( #{const sizeof(xd3_stream)} + B.length n + 1 + , n ) + Right _ -> ( #{const sizeof(xd3_stream)}, B.empty ) + s <- newPinnedByteArray len + let sptr = ptr (mutableByteArrayContents s) :: Ptr Xd3Stream + fillByteArray s 0 #{const sizeof(xd3_stream)} 0 + nptr <- case smatch_cfg cfg of + Right _ -> writeStringAt s #{const sizeof(xd3_stream)} n + Left _ -> return nullPtr + c <- do + c <- newPinnedByteArray #const sizeof(xd3_config) + fillByteArray c 0 #{const sizeof(xd3_config)} 0 + writeAtByte c #{off xd3_config, winsize} (winsize cfg) + writeAtByte c #{off xd3_config, sprevsz} (sprevsz cfg) + writeAtByte c #{off xd3_config, iopt_size} (iopt_size cfg) + writeAtByte c #{off xd3_config, flags} (coerce (flags cfg) :: Word32) + writeCompressorConfig c #{off xd3_config, sec_data} (sec_data cfg) + writeCompressorConfig c #{off xd3_config, sec_inst} (sec_inst cfg) + writeCompressorConfig c #{off xd3_config, sec_addr} (sec_addr cfg) + let msel :: #type xd3_smatch_cfg + msel = either (const #{const XD3_SMATCH_SOFT}) + (fromIntegral . fromEnum) + (smatch_cfg cfg) + writeAtByte c (#{off xd3_config, smatch_cfg}) msel + case smatch_cfg cfg of + Right _ -> return () + Left matcher -> do + let o = offset :: Offset #offset xd3_config,smatcher_soft + writeAtByte c (o +. (#{off xd3_smatcher, name})) nptr + writeMatcher c o matcher + unsafeFreezeByteArray c + let cptr = ptr (byteArrayContents c) :: Ptr Config + srcvar <- newMutVar Nothing + stream <- unsafeIOToPrim $ do + let finalize = do + -- freeHaskellFunPtr: aloc,free,getblk + xd3_abort_stream sptr + xd3_close_stream sptr + xd3_free_stream sptr + seq srcvar $ seq s $ return () -- Keep array s alive until the ffi functions finish. + fp <- newForeignPtr sptr finalize + return Stream + { streamArray = s + , streamPtr = fp + , streamSource = srcvar + } + unsafeIOToPrim (xd3_config_stream sptr cptr) >>= \case + XD3_SUCCESS -> return $ c `seq` Right stream + ecode -> return $ Left ecode + +writeStringAt :: PrimMonad m => MutableByteArray s -> Int -> B.ByteString -> m (Ptr a) +writeStringAt src o bsname = do + (p,cnt) <- unsafeIOToPrim $ B.unsafeUseAsCStringLen bsname return + let nptr = ptr (mutableByteArrayContents src) `plusPtr` o + copyAddr (adr nptr) (adr p) cnt + writeOffAddr (adr nptr) cnt (0 :: Word8) + return nptr + +data Xd3Source + +foreign import ccall "xdelta3.h xd3_set_source" xd3_set_source :: Ptr Xd3Stream -> Ptr Xd3Source -> IO ErrorCode + +set_source :: PrimMonad m => + Stream m -> String -- ^ name for debug/print purposes. + -> Usize_t -- ^ block size + -> Xoff_t -- ^ maximum visible buffer (Suggested: set same as block size). + -- Rounds up to approx 16k. + -> m () +set_source stream nm blksz maxwinsz = do + let bsname = encodeUtf8 $ T.pack nm + src <- newPinnedByteArray $ 1 + B.length bsname + #{const sizeof(xd3_source)} + nptr <- writeStringAt src #{const sizeof(xd3_source)} bsname + writeAtByte src (#{off xd3_source, blksize }) blksz + writeAtByte src (#{off xd3_source, name }) nptr + writeAtByte src (#{off xd3_source, max_winsize}) maxwinsz + writeAtByte src (#{off xd3_source, curblkno }) (maxBound :: Xoff_t) + {- + writeAtByte (streamArray stream) + #{offset xd3_stream, getblk} + nullPtr -- xdelta3.h documents this as an internal field. + -} + let strm = ptr (mutableByteArrayContents $ streamArray stream) + srcptr = ptr (mutableByteArrayContents src) + unsafeIOToPrim (xd3_set_source strm srcptr) + writeMutVar (streamSource stream) (Just src) + +data XDeltaMethods m u = XDeltaMethods + { xConfig :: Config + , xGetSource :: Xoff_t -> B.ByteString + , xOutput :: Ptr Word8 -> Int -> m u + , xOnError :: ErrorCode -> String -> m u + , xBlockSize :: Usize_t + , xInterleave :: forall a. m a -> m a + } + +-- -- | Checks for legal flag changes. +-- foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Xd3Stream -> Flags -> IO () + +setFlag :: PrimMonad m => Flags -> Stream m -> Bool -> m () +setFlag b stream wantFlush = do + f <- readAtByte (streamArray stream) (#{off xd3_stream, flags}) + writeAtByte (streamArray stream) (#{off xd3_stream, flags}) + . (coerce :: Flags -> Word32) + $ if wantFlush then Flags f .|. b + else Flags f .&. complement b + +setFlush :: PrimMonad m => Stream m -> Bool -> m () +setFlush = setFlag XD3_FLUSH + +setSkipWindow :: PrimMonad m => Stream m -> Bool -> m () +setSkipWindow = setFlag XD3_SKIP_WINDOW + +-- -- declared static +-- foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Xd3Stream -> Ptr a -> Usize_t -> IO () + +avail_input :: PrimMonad m => Stream m -> Ptr a -> Usize_t -> m () +avail_input stream p sz = do + writeAtByte (streamArray stream) (#{off xd3_stream, next_in}) p + writeAtByte (streamArray stream) (#{off xd3_stream, avail_in}) sz + +-- | This acknowledges receipt of output data, must be called after any +-- XD3_OUTPUT return. +-- -- declared static +-- foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Xd3Stream -> IO () + +nextOut :: PrimMonad m => Stream m -> ((Ptr Word8, Usize_t) -> m a) -> m a +nextOut stream action = do + buf <- (,) + <$> readAtByte (streamArray stream) (#{off xd3_stream, next_out}) + <*> readAtByte (streamArray stream) (#{off xd3_stream, avail_out}) + a <- action buf + -- unsafeIOToPrim $ xd3_consume_output (ptr $ mutableByteArrayContents $ streamArray stream) + writeAtByte (streamArray stream) (#{off xd3_stream, avail_out}) (0 :: Usize_t) + return a + + +requestedBlockNumber :: PrimMonad m => Stream m -> m (Maybe Xoff_t) +requestedBlockNumber stream = do + msrc <- readMutVar $ streamSource stream + forM msrc $ \src -> readAtByte src (#{off xd3_source, getblkno}) + +data CurrentBlock = CurrentBlock + { blkno :: !Xoff_t -- ^ current block number + , blkSize :: !Usize_t -- ^ number of bytes on current block: must be >= 0 and <= 'srcBlockSize' + , blkPtr :: !(Ptr Word8) -- ^ current block array + } + +-- -- declared static +-- foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Xd3Stream -> IO CString + +-- | Gives some extra information about the latest library error, if any +-- is known. +errorString :: PrimMonad m => Stream m -> m String +errorString stream = do + cstring <- readAtByte (streamArray stream) (#{off xd3_stream, msg}) + if cstring /= nullPtr + then unsafeIOToPrim $ peekCString cstring -- TODO: avoid unsafeIOToPrim + else return "" + +pokeCurrentBlock :: PrimMonad m => Stream m -> CurrentBlock -> m () +pokeCurrentBlock stream (CurrentBlock no sz ptr) = do + msrc <- readMutVar $ streamSource stream + forM_ msrc $ \src -> do + writeAtByte src (#{off xd3_source, curblkno}) no + writeAtByte src (#{off xd3_source, onblk}) sz + writeAtByte src (#{off xd3_source, curblk}) ptr + + +withByteString :: PrimBase m => B.ByteString -> (Ptr b -> Usize_t -> m a) -> m a +withByteString d act = + let (fp,off,len) = B.toForeignPtr d + in unsafeIOToPrim $ withForeignPtr fp $ \ptr -> unsafePrimToIO $ do + act (ptr `plusPtr` off) (fromIntegral len) + +xdelta :: (PrimBase m, Monoid u) => XDeltaMethods m u -> (Stream m -> m ErrorCode) -> [B.ByteString] -> m u +xdelta x xxcode_input ds = do + mstream <- config_stream (xConfig x) + either (\e _ -> xOnError x e "config_stream failed") + (flip ($)) + mstream $ \stream -> do + set_source stream "VCDIFF" (xBlockSize x) (xBlockSize x) + let go withBlk [] = return mempty + go withBlk (d:ds) = do + let (fp,off,len) = B.toForeignPtr d + eof = null ds + when eof $ setFlush stream True + withByteString d $ \indata len -> do + avail_input stream indata len + go2 withBlk eof ds + go2 withBlk eof ds = do + ret <- withBlk $ xxcode_input stream + case ret of + XD3_INPUT -> if (not eof) then go withBlk ds else return mempty + XD3_OUTPUT -> do + m' <- nextOut stream (\(p,len) -> xOutput x p (fromIntegral len)) + ms <- xInterleave x $ go2 withBlk eof ds + return $ m' <> ms + XD3_GETSRCBLK -> do + Just n <- requestedBlockNumber stream + let blk = xGetSource x n + withBlk' act = withByteString blk $ \p len -> do + pokeCurrentBlock stream $ CurrentBlock n len p + when (len < xBlockSize x) $ do + Just src <- readMutVar $ streamSource stream + writeAtByte src (#{off xd3_source, eof_known}) (1 :: #{type int}) + act + go2 withBlk' eof ds + XD3_GOTHEADER -> go2 withBlk eof ds -- No + XD3_WINSTART -> go2 withBlk eof ds -- action + XD3_WINFINISH -> go2 withBlk eof ds -- neccessary + -- -- These are set for each XD3_WINFINISH return. + -- xd3_encoder_used_source :: Ptr Stream -> IO Bool + -- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t + -- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t + e -> do + s <- errorString stream + xOnError x e s + xInterleave x $ go id ds + + +foreign import ccall "xdelta3.h xd3_decode_input" xd3_decode_input :: Ptr Xd3Stream -> IO ErrorCode +foreign import ccall "xdelta3.h xd3_encode_input" xd3_encode_input :: Ptr Xd3Stream -> IO ErrorCode + +decode_input :: PrimMonad m => Stream m -> m ErrorCode +decode_input stream = + unsafeIOToPrim $ xd3_decode_input (ptr $ mutableByteArrayContents $ streamArray stream) + +encode_input :: PrimMonad m => Stream m -> m ErrorCode +encode_input stream = + unsafeIOToPrim $ xd3_encode_input (ptr $ mutableByteArrayContents $ streamArray stream) + +-- RFC 3284 +newtype VCDIFF = VCDIFF L.ByteString + deriving Show + +chunksOf :: Usize_t -> L.ByteString -> [B.ByteString] +chunksOf len bs | L.null bs = [] + | otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs + in L.toStrict b : chunksOf len bs' + +computeDiff :: Config -> L.ByteString -> L.ByteString -> Result VCDIFF +computeDiff cfg source patched = fmap VCDIFF $ xdeltaPure encode_input cfg source patched + +applyPatch :: Config -> L.ByteString -> VCDIFF -> Result L.ByteString +applyPatch cfg source (VCDIFF delta) = xdeltaPure decode_input cfg source delta + +data Result x = Result + { result :: x -- ^ A possibly invalid result. To consume a lazy stream with fusion, avoid + -- evaluating 'resultError' until this field is fully processed. + , resultError :: Maybe (ErrorCode,String) + -- ^ If something went wrong while producing 'result', this + -- is an error code and message indicating what. + } deriving (Show,Functor) + +instance Monoid x => Monoid (Result x) where + mempty = Result mempty Nothing + mappend (Result x xe) y = Result (mappend x $ result y) (maybe (resultError y) Just xe) + +xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> Result L.ByteString +xdeltaPure codec cfg source input = + let bsize = chunk_size cfg + ds = chunksOf bsize input + smap = IntMap.fromList $ zip [0..] (chunksOf bsize source) + x :: XDeltaMethods (ST s) (Result L.ByteString) + x = XDeltaMethods + { xConfig = cfg + , xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of + Nothing -> B.empty + Just bs -> bs + , xOutput = \ptr len -> unsafeIOToST $ flip Result Nothing . L.fromStrict + <$> B.packCStringLen (castPtr ptr,len) + , xOnError = \e s -> return (Result L.empty (Just (e,s))) + , xBlockSize = bsize + , xInterleave = unsafeInterleaveST + } + in runST $ xdelta x codec ds + +defaultConfig :: Config +defaultConfig = Config + { winsize = XD3_DEFAULT_WINSIZE + , sprevsz = XD3_DEFAULT_SPREVSZ + , iopt_size = XD3_DEFAULT_IOPT_SIZE + , flags = mempty + , sec_data = CompressorConfig 0 0 0 + , sec_inst = CompressorConfig 0 0 0 + , sec_addr = CompressorConfig 0 0 0 + , smatch_cfg = Right SMATCH_DEFAULT + , chunk_size = 4096 + } diff --git a/haskell/Data/VCDIFF/Types.hsc b/haskell/Data/VCDIFF/Types.hsc new file mode 100644 index 0000000..015f406 --- /dev/null +++ b/haskell/Data/VCDIFF/Types.hsc @@ -0,0 +1,208 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternSynonyms #-} +module Data.VCDIFF.Types where + +import Control.Exception +import Control.Monad +import Data.Bits +import qualified Data.ByteString as B +import qualified Data.ByteString.Internal as B +import Data.Function +import Data.Int +import Data.Monoid +import Data.Primitive.ByteArray +import qualified Data.Text as T +import Data.Text.Encoding +import Data.Word +import Foreign.C.String +import Foreign.C.Types +import Foreign.ForeignPtr +import Foreign.Marshal.Alloc +import Foreign.Marshal.Utils +import Foreign.Ptr +import Foreign.Storable +import System.IO +import System.IO.Error +import System.IO.Unsafe + + + +#ifndef SIZEOF_SIZE_T +#define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ +#define SIZEOF_UNSIGNED_INT __SIZEOF_INT__ +#define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__ +#define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__ +#define static_assert(...) +#endif +#include + +type Usize_t = #type usize_t +-- | Printf code for type Usize_t +pattern W :: String +pattern W = #const_str W "" + +type Xoff_t = #type xoff_t +-- | Printf code for type Xoff_t +pattern Q :: String +pattern Q = #const_str Q "" + + +-- | These are the five ordinary status codes returned by the +-- xd3_encode_input() and xd3_decode_input() state machines. +-- +-- An application must be prepared to handle these five return +-- values from either xd3_encode_input or xd3_decode_input except +-- in the case of no-source compression in which case XD3_GETSRCBLK +-- is never returned. More detailed comments for these are given in +-- xd3_encode_input and xd3_decode_input comments below. +newtype ErrorCode = ErrorCode CInt + deriving Show + +pattern XD3_SUCCESS = ErrorCode 0 + +-- | need input +pattern XD3_INPUT = ErrorCode (#const XD3_INPUT) + +-- | have output +pattern XD3_OUTPUT = ErrorCode (#const XD3_OUTPUT) + +-- | need a block of source input (with no xd3_getblk function) a chance to do non-blocking read. +pattern XD3_GETSRCBLK = ErrorCode (#const XD3_GETSRCBLK) + +-- | (decode-only) after the initial VCDIFF & first window header +pattern XD3_GOTHEADER = ErrorCode (#const XD3_GOTHEADER) + +-- | notification: returned before a window is processed giving a chance to XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that window. +pattern XD3_WINSTART = ErrorCode (#const XD3_WINSTART) + +-- | notification: returned after encode/decode & output for a window +pattern XD3_WINFINISH = ErrorCode (#const XD3_WINFINISH) + +-- | (encoder only) may be returned by getblk() if the block is too old +pattern XD3_TOOFARBACK = ErrorCode (#const XD3_TOOFARBACK) + +-- | internal error +pattern XD3_INTERNAL = ErrorCode (#const XD3_INTERNAL) + +-- | invalid config +pattern XD3_INVALID = ErrorCode (#const XD3_INVALID) + +-- | invalid input/decoder error +pattern XD3_INVALID_INPUT = ErrorCode (#const XD3_INVALID_INPUT) + +-- | when secondary compression finds no improvement. +pattern XD3_NOSECOND = ErrorCode (#const XD3_NOSECOND) + +-- | currently VCD_TARGET VCD_CODETABLE +pattern XD3_UNIMPLEMENTED = ErrorCode (#const XD3_UNIMPLEMENTED) + +instance Exception ErrorCode + +data Config = Config + { winsize :: Usize_t -- ^ The encoder window size. + -- The encoder allocates a buffer of this size if the + -- program supplies input in smaller units (unless the + -- XD3_FLUSH flag is set). + , sprevsz :: Usize_t -- ^ How far back small string matching goes + , iopt_size :: Usize_t -- ^ entries in the instruction-optimizing buffer + , flags :: Flags -- ^ stream->flags are initialized from xd3_config & never modified by the library. Use xd3_set_flags to modify flags settings mid-stream. + , sec_data :: CompressorConfig -- ^ Secondary compressor config: data + , sec_inst :: CompressorConfig -- ^ Secondary compressor config: inst + , sec_addr :: CompressorConfig -- ^ Secondary compressor config: addr + , smatch_cfg :: Either StringMatcher SMatchSelect -- ^ See enum: use fields below for soft config + , chunk_size :: Usize_t -- ^ Suggested chunking size for streaming. + } + +pattern XD3_DEFAULT_WINSIZE = #const XD3_DEFAULT_WINSIZE +pattern XD3_DEFAULT_SPREVSZ = #const XD3_DEFAULT_SPREVSZ +pattern XD3_DEFAULT_IOPT_SIZE = #const XD3_DEFAULT_IOPT_SIZE + +newtype Flags = Flags Word32 + deriving (Storable,Eq,Bits,FiniteBits) + +-- used by VCDIFF tools, see xdelta3-main.h.--/ +pattern XD3_JUST_HDR = Flags (#const XD3_JUST_HDR) +-- used by VCDIFF tools see xdelta3-main.h.--/ +pattern XD3_SKIP_WINDOW = Flags (#const XD3_SKIP_WINDOW) +-- | used by VCDIFF tools, see xdelta3-main.h. */ +pattern XD3_SKIP_EMIT = Flags (#const XD3_SKIP_EMIT) +-- | flush the stream buffer to prepare for xd3_stream_close(). */ +pattern XD3_FLUSH = Flags (#const XD3_FLUSH) +-- | use DJW static huffman */ +pattern XD3_SEC_DJW = Flags (#const XD3_SEC_DJW) +-- | use FGK adaptive huffman */ +pattern XD3_SEC_FGK = Flags (#const XD3_SEC_FGK) +-- | use LZMA secondary */ +pattern XD3_SEC_LZMA = Flags (#const XD3_SEC_LZMA) +pattern XD3_SEC_TYPE = Flags (#const XD3_SEC_TYPE) +-- | disable secondary compression of the data section. */ +pattern XD3_SEC_NODATA = Flags (#const XD3_SEC_NODATA) +-- | disable secondary compression of the inst section. */ +pattern XD3_SEC_NOINST = Flags (#const XD3_SEC_NOINST) +-- | disable secondary compression of the addr section. */ +pattern XD3_SEC_NOADDR = Flags (#const XD3_SEC_NOADDR) +pattern XD3_SEC_NOALL = Flags (#const XD3_SEC_NOALL) +-- | enable checksum computation in the encoder. */ +pattern XD3_ADLER32 = Flags (#const XD3_ADLER32) +-- | disable checksum verification in the decoder. */ +pattern XD3_ADLER32_NOVER = Flags (#const XD3_ADLER32_NOVER) +-- | disable ordinary data * compression feature, only search * the source, not the target. */ +pattern XD3_NOCOMPRESS = Flags (#const XD3_NOCOMPRESS) +-- | disable the "1.5-pass * algorithm", instead use greedy * matching. Greedy is off by * default. */ +pattern XD3_BEGREEDY = Flags (#const XD3_BEGREEDY) +-- | used by "recode". */ +pattern XD3_ADLER32_RECODE = Flags (#const XD3_ADLER32_RECODE) +-- 4 bits to set the compression level the same as the command-line +-- setting -1 through -9 Flags (-0 corresponds to the XD3_NOCOMPRESS flag +-- and is independent of compression level). This is for +-- convenience especially with xd3_encode_memoryFlags (). */ +pattern XD3_COMPLEVEL_SHIFT = #const XD3_COMPLEVEL_SHIFT +pattern XD3_COMPLEVEL_MASK = Flags (#const XD3_COMPLEVEL_MASK) +pattern XD3_COMPLEVEL_1 = Flags (#const XD3_COMPLEVEL_1) +pattern XD3_COMPLEVEL_2 = Flags (#const XD3_COMPLEVEL_2) +pattern XD3_COMPLEVEL_3 = Flags (#const XD3_COMPLEVEL_3) +pattern XD3_COMPLEVEL_6 = Flags (#const XD3_COMPLEVEL_6) +pattern XD3_COMPLEVEL_9 = Flags (#const XD3_COMPLEVEL_9) + +instance Monoid Flags where + mempty = Flags 0 + Flags a `mappend` Flags b = Flags (a .|. b) + +-- | Settings for the secondary compressor. +data CompressorConfig = CompressorConfig + { ngroups :: Usize_t -- ^ Number of DJW Huffman groups. + , sector_size :: Usize_t -- ^ Sector size. + , inefficient :: #{type int} -- ^ If true, ignore efficiency check [avoid XD3_NOSECOND]. + } + +-- | The values of this enumeration are set in xd3_config using the +-- 'smatch_cfg' variable. It can be set to default, slow, fast, etc., +-- and soft. +data SMatchSelect + = SMATCH_DEFAULT -- ^ Flags may contain XD3_COMPLEVEL bits, else default. + | SMATCH_SLOW + | SMATCH_FAST + | SMATCH_FASTER + | SMATCH_FASTEST + deriving Enum + +-- | This type exists only to be a tag for Ptr to an underlying C-struct called +-- xd3_stream. +data Xd3Stream + +-- | This is the record of a pre-compiled configuration, a subset of +-- xd3_config. (struct _xd3_smatcher) +data StringMatcher = StringMatcher + { smName :: String + , smStringMatch :: FunPtr (Ptr Xd3Stream -> ErrorCode) + , smLargeLook :: Usize_t + , smLargeStep :: Usize_t + , smSmallLook :: Usize_t + , smSmallChain :: Usize_t + , smSmallLchain :: Usize_t + , smMaxLazy :: Usize_t + , smLongEnough :: Usize_t + } diff --git a/haskell/Data/XDelta.hsc b/haskell/Data/XDelta.hsc deleted file mode 100644 index 8128a61..0000000 --- a/haskell/Data/XDelta.hsc +++ /dev/null @@ -1,392 +0,0 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE NondecreasingIndentation #-} -{-# LANGUAGE PatternSynonyms #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE DeriveFunctor #-} -module Data.XDelta where - -import Control.Monad -import Control.Monad.Primitive -import Control.Monad.ST -import Control.Monad.ST.Unsafe -import Data.BA -import Data.Bits -import qualified Data.ByteString as B -import qualified Data.ByteString.Unsafe as B -import qualified Data.ByteString.Internal as B -import qualified Data.ByteString.Lazy as L -import Data.Coerce -import Data.Int -import qualified Data.IntMap as IntMap -import Data.Monoid -import Data.Primitive.Addr -import Data.Primitive.ByteArray -import Data.Primitive.MutVar -import Data.STRef -import qualified Data.Text as T -import Data.Text.Encoding -import Data.Word -import Foreign.C.Types -import Foreign.C.String -import Foreign.ForeignPtr (withForeignPtr) -import Foreign.Ptr -import Foreign.Concurrent -import Foreign.Storable -import Foreign.ForeignPtr (ForeignPtr) -import GHC.Exts - -import XDelta.Types - -#ifndef SIZEOF_SIZE_T -#define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ -#define SIZEOF_UNSIGNED_INT __SIZEOF_INT__ -#define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__ -#define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__ -#define static_assert(...) -#endif -#include - -data Stream m = Stream - { streamArray :: MutableByteArray (PrimState m) - , streamPtr :: ForeignPtr Xd3Stream -- ^ HACK: this is only used to attach a finalizer - -- to 'streamArray'. Don't use this pointer. - -- This would be unnecessary if I could create a - -- MutableByteArray with a finalizer attached. - , streamSource :: MutVar (PrimState m) (Maybe (MutableByteArray (PrimState m))) - } - -foreign import ccall "xdelta3.h xd3_config_stream" xd3_config_stream :: Ptr Xd3Stream -> Ptr Config -> IO ErrorCode - -foreign import ccall "xdelta3.h xd3_free_stream" xd3_free_stream :: Ptr Xd3Stream -> IO () - -foreign import ccall "xdelta3.h xd3_abort_stream" xd3_abort_stream :: Ptr Xd3Stream -> IO () - -foreign import ccall "xdelta3.h xd3_close_stream" xd3_close_stream :: Ptr Xd3Stream -> IO ErrorCode - - -writeCompressorConfig :: PrimMonad m => - MutableByteArray (PrimState m) -> Int -> CompressorConfig -> m () -writeCompressorConfig c o sec = do - writeAtByte c (o + #{offset xd3_sec_cfg,ngroups}) (ngroups sec) - writeAtByte c (o + #{offset xd3_sec_cfg,sector_size}) (sector_size sec) - writeAtByte c (o + #{offset xd3_sec_cfg,inefficient}) (inefficient sec) - -writeMatcher :: PrimMonad m => - MutableByteArray (PrimState m) -> Int -> StringMatcher -> m () -writeMatcher c o sm = do - -- handled elsewhere: const char *name; <- smName :: String - writeAtByte c (o + #{offset xd3_smatcher, string_match }) (smStringMatch sm) - writeAtByte c (o + #{offset xd3_smatcher, large_look }) (smLargeLook sm) - writeAtByte c (o + #{offset xd3_smatcher, large_step }) (smLargeStep sm) - writeAtByte c (o + #{offset xd3_smatcher, small_look }) (smSmallLook sm) - writeAtByte c (o + #{offset xd3_smatcher, small_chain }) (smSmallChain sm) - writeAtByte c (o + #{offset xd3_smatcher, small_lchain }) (smSmallLchain sm) - writeAtByte c (o + #{offset xd3_smatcher, max_lazy }) (smMaxLazy sm) - writeAtByte c (o + #{offset xd3_smatcher, long_enough }) (smLongEnough sm) - -ptr :: Addr -> Ptr a -ptr (Addr a) = Ptr a - -adr :: Ptr a -> Addr -adr (Ptr a) = Addr a - --- The xd3_config structure is used to initialize a stream - all data --- is copied into stream so config may be a temporary variable. See --- the [documentation] or comments on the xd3_config structure. -config_stream :: PrimMonad m => Config -> m (Either ErrorCode (Stream m)) -config_stream cfg = do - let (len,n) = case smatch_cfg cfg of - Left m -> let n = encodeUtf8 $ T.pack $ smName m - in ( #{const sizeof(xd3_stream)} + B.length n + 1 - , n ) - Right _ -> ( #{const sizeof(xd3_stream)}, B.empty ) - s <- newPinnedByteArray len - let sptr = ptr (mutableByteArrayContents s) :: Ptr Xd3Stream - fillByteArray s 0 #{const sizeof(xd3_stream)} 0 - nptr <- case smatch_cfg cfg of - Right _ -> writeStringAt s #{const sizeof(xd3_stream)} n - Left _ -> return nullPtr - c <- do - c <- newPinnedByteArray #const sizeof(xd3_config) - fillByteArray c 0 #{const sizeof(xd3_config)} 0 - writeAtByte c #{offset xd3_config, winsize} (winsize cfg) - writeAtByte c #{offset xd3_config, sprevsz} (sprevsz cfg) - writeAtByte c #{offset xd3_config, iopt_size} (iopt_size cfg) - writeAtByte c #{offset xd3_config, flags} (coerce (flags cfg) :: Word32) - writeCompressorConfig c #{offset xd3_config, sec_data} (sec_data cfg) - writeCompressorConfig c #{offset xd3_config, sec_inst} (sec_inst cfg) - writeCompressorConfig c #{offset xd3_config, sec_addr} (sec_addr cfg) - let msel :: #type xd3_smatch_cfg - msel = either (const #{const XD3_SMATCH_SOFT}) - (fromIntegral . fromEnum) - (smatch_cfg cfg) - writeAtByte c #{offset xd3_config, smatch_cfg} msel - case smatch_cfg cfg of - Right _ -> return () - Left matcher -> do - let o = #offset xd3_config,smatcher_soft - writeAtByte c (o + #{offset xd3_smatcher, name}) nptr - writeMatcher c o matcher - unsafeFreezeByteArray c - let cptr = ptr (byteArrayContents c) :: Ptr Config - srcvar <- newMutVar Nothing - stream <- unsafeIOToPrim $ do - let finalize = do - -- freeHaskellFunPtr: aloc,free,getblk - xd3_abort_stream sptr - xd3_close_stream sptr - xd3_free_stream sptr - seq srcvar $ seq s $ return () -- Keep array s alive until the ffi functions finish. - fp <- newForeignPtr sptr finalize - return Stream - { streamArray = s - , streamPtr = fp - , streamSource = srcvar - } - unsafeIOToPrim (xd3_config_stream sptr cptr) >>= \case - XD3_SUCCESS -> return $ c `seq` Right stream - ecode -> return $ Left ecode - -writeStringAt :: PrimMonad m => MutableByteArray s -> Int -> B.ByteString -> m (Ptr a) -writeStringAt src o bsname = do - (p,cnt) <- unsafeIOToPrim $ B.unsafeUseAsCStringLen bsname return - let nptr = ptr (mutableByteArrayContents src) `plusPtr` o - copyAddr (adr nptr) (adr p) cnt - writeOffAddr (adr nptr) cnt (0 :: Word8) - return nptr - -data Xd3Source - -foreign import ccall "xdelta3.h xd3_set_source" xd3_set_source :: Ptr Xd3Stream -> Ptr Xd3Source -> IO ErrorCode - -set_source :: PrimMonad m => - Stream m -> String -- ^ name for debug/print purposes. - -> Usize_t -- ^ block size - -> Xoff_t -- ^ maximum visible buffer (Suggested: set same as block size). - -- Rounds up to approx 16k. - -> m () -set_source stream nm blksz maxwinsz = do - let bsname = encodeUtf8 $ T.pack nm - src <- newPinnedByteArray $ 1 + B.length bsname + #{const sizeof(xd3_source)} - nptr <- writeStringAt src #{const sizeof(xd3_source)} bsname - writeAtByte src #{offset xd3_source, blksize } blksz - writeAtByte src #{offset xd3_source, name } nptr - writeAtByte src #{offset xd3_source, max_winsize} maxwinsz - writeAtByte src #{offset xd3_source, curblkno } (maxBound :: Xoff_t) - {- - writeAtByte (streamArray stream) - #{offset xd3_stream, getblk} - nullPtr -- xdelta3.h documents this as an internal field. - -} - let strm = ptr (mutableByteArrayContents $ streamArray stream) - srcptr = ptr (mutableByteArrayContents src) - unsafeIOToPrim (xd3_set_source strm srcptr) - writeMutVar (streamSource stream) (Just src) - -data XDeltaMethods m u = XDeltaMethods - { xConfig :: Config - , xGetSource :: Xoff_t -> B.ByteString - , xOutput :: Ptr Word8 -> Int -> m u - , xOnError :: ErrorCode -> String -> m u - , xBlockSize :: Usize_t - , xInterleave :: forall a. m a -> m a - } - --- -- | Checks for legal flag changes. --- foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Xd3Stream -> Flags -> IO () - -setFlag :: PrimMonad m => Flags -> Stream m -> Bool -> m () -setFlag b stream wantFlush = do - f <- readAtByte (streamArray stream) #{offset xd3_stream, flags} - writeAtByte (streamArray stream) #{offset xd3_stream, flags} - . (coerce :: Flags -> Word32) - $ if wantFlush then Flags f .|. b - else Flags f .&. complement b - -setFlush :: PrimMonad m => Stream m -> Bool -> m () -setFlush = setFlag XD3_FLUSH - -setSkipWindow :: PrimMonad m => Stream m -> Bool -> m () -setSkipWindow = setFlag XD3_SKIP_WINDOW - --- -- declared static --- foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Xd3Stream -> Ptr a -> Usize_t -> IO () - -avail_input :: PrimMonad m => Stream m -> Ptr a -> Usize_t -> m () -avail_input stream p sz = do - writeAtByte (streamArray stream) #{offset xd3_stream, next_in} p - writeAtByte (streamArray stream) #{offset xd3_stream, avail_in} sz - --- | This acknowledges receipt of output data, must be called after any --- XD3_OUTPUT return. --- -- declared static --- foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Xd3Stream -> IO () - -nextOut :: PrimMonad m => Stream m -> ((Ptr Word8, Usize_t) -> m a) -> m a -nextOut stream action = do - buf <- (,) - <$> readAtByte (streamArray stream) #{offset xd3_stream, next_out} - <*> readAtByte (streamArray stream) #{offset xd3_stream, avail_out} - a <- action buf - -- unsafeIOToPrim $ xd3_consume_output (ptr $ mutableByteArrayContents $ streamArray stream) - writeAtByte (streamArray stream) #{offset xd3_stream, avail_out} (0 :: Usize_t) - return a - - -requestedBlockNumber :: PrimMonad m => Stream m -> m (Maybe Xoff_t) -requestedBlockNumber stream = do - msrc <- readMutVar $ streamSource stream - forM msrc $ \src -> readAtByte src #offset xd3_source, getblkno - -data CurrentBlock = CurrentBlock - { blkno :: !Xoff_t -- ^ current block number - , blkSize :: !Usize_t -- ^ number of bytes on current block: must be >= 0 and <= 'srcBlockSize' - , blkPtr :: !(Ptr Word8) -- ^ current block array - } - --- -- declared static --- foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Xd3Stream -> IO CString - --- | Gives some extra information about the latest library error, if any --- is known. -errorString :: PrimMonad m => Stream m -> m String -errorString stream = do - cstring <- readAtByte (streamArray stream) #offset xd3_stream, msg - if cstring /= nullPtr - then unsafeIOToPrim $ peekCString cstring -- TODO: avoid unsafeIOToPrim - else return "" - -pokeCurrentBlock :: PrimMonad m => Stream m -> CurrentBlock -> m () -pokeCurrentBlock stream (CurrentBlock no sz ptr) = do - msrc <- readMutVar $ streamSource stream - forM_ msrc $ \src -> do - writeAtByte src #{offset xd3_source, curblkno} no - writeAtByte src #{offset xd3_source, onblk} sz - writeAtByte src #{offset xd3_source, curblk} ptr - - -withByteString :: PrimBase m => B.ByteString -> (Ptr b -> Usize_t -> m a) -> m a -withByteString d act = - let (fp,off,len) = B.toForeignPtr d - in unsafeIOToPrim $ withForeignPtr fp $ \ptr -> unsafePrimToIO $ do - act (ptr `plusPtr` off) (fromIntegral len) - -xdelta :: (PrimBase m, Monoid u) => XDeltaMethods m u -> (Stream m -> m ErrorCode) -> [B.ByteString] -> m u -xdelta x xxcode_input ds = do - mstream <- config_stream (xConfig x) - either (\e _ -> xOnError x e "config_stream failed") - (flip ($)) - mstream $ \stream -> do - set_source stream "VCDIFF" (xBlockSize x) (xBlockSize x) - let go withBlk [] = return mempty - go withBlk (d:ds) = do - let (fp,off,len) = B.toForeignPtr d - eof = null ds - when eof $ setFlush stream True - withByteString d $ \indata len -> do - avail_input stream indata len - go2 withBlk eof ds - go2 withBlk eof ds = do - ret <- withBlk $ xxcode_input stream - case ret of - XD3_INPUT -> if (not eof) then go withBlk ds else return mempty - XD3_OUTPUT -> do - m' <- nextOut stream (\(p,len) -> xOutput x p (fromIntegral len)) - ms <- xInterleave x $ go2 withBlk eof ds - return $ m' <> ms - XD3_GETSRCBLK -> do - Just n <- requestedBlockNumber stream - let blk = xGetSource x n - withBlk' act = withByteString blk $ \p len -> do - pokeCurrentBlock stream $ CurrentBlock n len p - when (len < xBlockSize x) $ do - Just src <- readMutVar $ streamSource stream - writeAtByte src #{offset xd3_source, eof_known} (1 :: #{type int}) - act - go2 withBlk' eof ds - XD3_GOTHEADER -> go2 withBlk eof ds -- No - XD3_WINSTART -> go2 withBlk eof ds -- action - XD3_WINFINISH -> go2 withBlk eof ds -- neccessary - -- -- These are set for each XD3_WINFINISH return. - -- xd3_encoder_used_source :: Ptr Stream -> IO Bool - -- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t - -- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t - e -> do - s <- errorString stream - xOnError x e s - xInterleave x $ go id ds - - -foreign import ccall "xdelta3.h xd3_decode_input" xd3_decode_input :: Ptr Xd3Stream -> IO ErrorCode -foreign import ccall "xdelta3.h xd3_encode_input" xd3_encode_input :: Ptr Xd3Stream -> IO ErrorCode - -decode_input :: PrimMonad m => Stream m -> m ErrorCode -decode_input stream = - unsafeIOToPrim $ xd3_decode_input (ptr $ mutableByteArrayContents $ streamArray stream) - -encode_input :: PrimMonad m => Stream m -> m ErrorCode -encode_input stream = - unsafeIOToPrim $ xd3_encode_input (ptr $ mutableByteArrayContents $ streamArray stream) - --- RFC 3284 -newtype VCDIFF = VCDIFF L.ByteString - deriving Show - -chunksOf :: Usize_t -> L.ByteString -> [B.ByteString] -chunksOf len bs | L.null bs = [] - | otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs - in L.toStrict b : chunksOf len bs' - -computeDiff :: Config -> L.ByteString -> L.ByteString -> Result VCDIFF -computeDiff cfg source patched = fmap VCDIFF $ xdeltaPure encode_input cfg source patched - -applyPatch :: Config -> L.ByteString -> VCDIFF -> Result L.ByteString -applyPatch cfg source (VCDIFF delta) = xdeltaPure decode_input cfg source delta - -data Result x = Result - { result :: x -- ^ A possibly invalid result. To consume a lazy stream with fusion, avoid - -- evaluating 'resultError' until this field is fully processed. - , resultError :: Maybe (ErrorCode,String) - -- ^ If something went wrong while producing 'result', this - -- is an error code and message indicating what. - } deriving (Show,Functor) - -instance Monoid x => Monoid (Result x) where - mempty = Result mempty Nothing - mappend (Result x xe) y = Result (mappend x $ result y) (maybe (resultError y) Just xe) - -xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> Result L.ByteString -xdeltaPure codec cfg source input = - let bsize = chunk_size cfg - ds = chunksOf bsize input - smap = IntMap.fromList $ zip [0..] (chunksOf bsize source) - x :: XDeltaMethods (ST s) (Result L.ByteString) - x = XDeltaMethods - { xConfig = cfg - , xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of - Nothing -> B.empty - Just bs -> bs - , xOutput = \ptr len -> unsafeIOToST $ flip Result Nothing . L.fromStrict - <$> B.packCStringLen (castPtr ptr,len) - , xOnError = \e s -> return (Result L.empty (Just (e,s))) - , xBlockSize = bsize - , xInterleave = unsafeInterleaveST - } - in runST $ xdelta x codec ds - -defaultConfig :: Config -defaultConfig = Config - { winsize = XD3_DEFAULT_WINSIZE - , sprevsz = XD3_DEFAULT_SPREVSZ - , iopt_size = XD3_DEFAULT_IOPT_SIZE - , flags = mempty - , sec_data = CompressorConfig 0 0 0 - , sec_inst = CompressorConfig 0 0 0 - , sec_addr = CompressorConfig 0 0 0 - , smatch_cfg = Right SMATCH_DEFAULT - , chunk_size = 4096 - } diff --git a/haskell/XDelta/Types.hsc b/haskell/XDelta/Types.hsc deleted file mode 100644 index 8a60805..0000000 --- a/haskell/XDelta/Types.hsc +++ /dev/null @@ -1,208 +0,0 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE PatternSynonyms #-} -module XDelta.Types where - -import Control.Exception -import Control.Monad -import Data.Bits -import qualified Data.ByteString as B -import qualified Data.ByteString.Internal as B -import Data.Function -import Data.Int -import Data.Monoid -import Data.Primitive.ByteArray -import qualified Data.Text as T -import Data.Text.Encoding -import Data.Word -import Foreign.C.String -import Foreign.C.Types -import Foreign.ForeignPtr -import Foreign.Marshal.Alloc -import Foreign.Marshal.Utils -import Foreign.Ptr -import Foreign.Storable -import System.IO -import System.IO.Error -import System.IO.Unsafe - - - -#ifndef SIZEOF_SIZE_T -#define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ -#define SIZEOF_UNSIGNED_INT __SIZEOF_INT__ -#define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__ -#define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__ -#define static_assert(...) -#endif -#include - -type Usize_t = #type usize_t --- | Printf code for type Usize_t -pattern W :: String -pattern W = #const_str W "" - -type Xoff_t = #type xoff_t --- | Printf code for type Xoff_t -pattern Q :: String -pattern Q = #const_str Q "" - - --- | These are the five ordinary status codes returned by the --- xd3_encode_input() and xd3_decode_input() state machines. --- --- An application must be prepared to handle these five return --- values from either xd3_encode_input or xd3_decode_input except --- in the case of no-source compression in which case XD3_GETSRCBLK --- is never returned. More detailed comments for these are given in --- xd3_encode_input and xd3_decode_input comments below. -newtype ErrorCode = ErrorCode CInt - deriving Show - -pattern XD3_SUCCESS = ErrorCode 0 - --- | need input -pattern XD3_INPUT = ErrorCode (#const XD3_INPUT) - --- | have output -pattern XD3_OUTPUT = ErrorCode (#const XD3_OUTPUT) - --- | need a block of source input (with no xd3_getblk function) a chance to do non-blocking read. -pattern XD3_GETSRCBLK = ErrorCode (#const XD3_GETSRCBLK) - --- | (decode-only) after the initial VCDIFF & first window header -pattern XD3_GOTHEADER = ErrorCode (#const XD3_GOTHEADER) - --- | notification: returned before a window is processed giving a chance to XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that window. -pattern XD3_WINSTART = ErrorCode (#const XD3_WINSTART) - --- | notification: returned after encode/decode & output for a window -pattern XD3_WINFINISH = ErrorCode (#const XD3_WINFINISH) - --- | (encoder only) may be returned by getblk() if the block is too old -pattern XD3_TOOFARBACK = ErrorCode (#const XD3_TOOFARBACK) - --- | internal error -pattern XD3_INTERNAL = ErrorCode (#const XD3_INTERNAL) - --- | invalid config -pattern XD3_INVALID = ErrorCode (#const XD3_INVALID) - --- | invalid input/decoder error -pattern XD3_INVALID_INPUT = ErrorCode (#const XD3_INVALID_INPUT) - --- | when secondary compression finds no improvement. -pattern XD3_NOSECOND = ErrorCode (#const XD3_NOSECOND) - --- | currently VCD_TARGET VCD_CODETABLE -pattern XD3_UNIMPLEMENTED = ErrorCode (#const XD3_UNIMPLEMENTED) - -instance Exception ErrorCode - -data Config = Config - { winsize :: Usize_t -- ^ The encoder window size. - -- The encoder allocates a buffer of this size if the - -- program supplies input in smaller units (unless the - -- XD3_FLUSH flag is set). - , sprevsz :: Usize_t -- ^ How far back small string matching goes - , iopt_size :: Usize_t -- ^ entries in the instruction-optimizing buffer - , flags :: Flags -- ^ stream->flags are initialized from xd3_config & never modified by the library. Use xd3_set_flags to modify flags settings mid-stream. - , sec_data :: CompressorConfig -- ^ Secondary compressor config: data - , sec_inst :: CompressorConfig -- ^ Secondary compressor config: inst - , sec_addr :: CompressorConfig -- ^ Secondary compressor config: addr - , smatch_cfg :: Either StringMatcher SMatchSelect -- ^ See enum: use fields below for soft config - , chunk_size :: Usize_t -- ^ Suggested chunking size for streaming. - } - -pattern XD3_DEFAULT_WINSIZE = #const XD3_DEFAULT_WINSIZE -pattern XD3_DEFAULT_SPREVSZ = #const XD3_DEFAULT_SPREVSZ -pattern XD3_DEFAULT_IOPT_SIZE = #const XD3_DEFAULT_IOPT_SIZE - -newtype Flags = Flags Word32 - deriving (Storable,Eq,Bits,FiniteBits) - --- used by VCDIFF tools, see xdelta3-main.h.--/ -pattern XD3_JUST_HDR = Flags (#const XD3_JUST_HDR) --- used by VCDIFF tools see xdelta3-main.h.--/ -pattern XD3_SKIP_WINDOW = Flags (#const XD3_SKIP_WINDOW) --- | used by VCDIFF tools, see xdelta3-main.h. */ -pattern XD3_SKIP_EMIT = Flags (#const XD3_SKIP_EMIT) --- | flush the stream buffer to prepare for xd3_stream_close(). */ -pattern XD3_FLUSH = Flags (#const XD3_FLUSH) --- | use DJW static huffman */ -pattern XD3_SEC_DJW = Flags (#const XD3_SEC_DJW) --- | use FGK adaptive huffman */ -pattern XD3_SEC_FGK = Flags (#const XD3_SEC_FGK) --- | use LZMA secondary */ -pattern XD3_SEC_LZMA = Flags (#const XD3_SEC_LZMA) -pattern XD3_SEC_TYPE = Flags (#const XD3_SEC_TYPE) --- | disable secondary compression of the data section. */ -pattern XD3_SEC_NODATA = Flags (#const XD3_SEC_NODATA) --- | disable secondary compression of the inst section. */ -pattern XD3_SEC_NOINST = Flags (#const XD3_SEC_NOINST) --- | disable secondary compression of the addr section. */ -pattern XD3_SEC_NOADDR = Flags (#const XD3_SEC_NOADDR) -pattern XD3_SEC_NOALL = Flags (#const XD3_SEC_NOALL) --- | enable checksum computation in the encoder. */ -pattern XD3_ADLER32 = Flags (#const XD3_ADLER32) --- | disable checksum verification in the decoder. */ -pattern XD3_ADLER32_NOVER = Flags (#const XD3_ADLER32_NOVER) --- | disable ordinary data * compression feature, only search * the source, not the target. */ -pattern XD3_NOCOMPRESS = Flags (#const XD3_NOCOMPRESS) --- | disable the "1.5-pass * algorithm", instead use greedy * matching. Greedy is off by * default. */ -pattern XD3_BEGREEDY = Flags (#const XD3_BEGREEDY) --- | used by "recode". */ -pattern XD3_ADLER32_RECODE = Flags (#const XD3_ADLER32_RECODE) --- 4 bits to set the compression level the same as the command-line --- setting -1 through -9 Flags (-0 corresponds to the XD3_NOCOMPRESS flag --- and is independent of compression level). This is for --- convenience especially with xd3_encode_memoryFlags (). */ -pattern XD3_COMPLEVEL_SHIFT = #const XD3_COMPLEVEL_SHIFT -pattern XD3_COMPLEVEL_MASK = Flags (#const XD3_COMPLEVEL_MASK) -pattern XD3_COMPLEVEL_1 = Flags (#const XD3_COMPLEVEL_1) -pattern XD3_COMPLEVEL_2 = Flags (#const XD3_COMPLEVEL_2) -pattern XD3_COMPLEVEL_3 = Flags (#const XD3_COMPLEVEL_3) -pattern XD3_COMPLEVEL_6 = Flags (#const XD3_COMPLEVEL_6) -pattern XD3_COMPLEVEL_9 = Flags (#const XD3_COMPLEVEL_9) - -instance Monoid Flags where - mempty = Flags 0 - Flags a `mappend` Flags b = Flags (a .|. b) - --- | Settings for the secondary compressor. -data CompressorConfig = CompressorConfig - { ngroups :: Usize_t -- ^ Number of DJW Huffman groups. - , sector_size :: Usize_t -- ^ Sector size. - , inefficient :: #{type int} -- ^ If true, ignore efficiency check [avoid XD3_NOSECOND]. - } - --- | The values of this enumeration are set in xd3_config using the --- 'smatch_cfg' variable. It can be set to default, slow, fast, etc., --- and soft. -data SMatchSelect - = SMATCH_DEFAULT -- ^ Flags may contain XD3_COMPLEVEL bits, else default. - | SMATCH_SLOW - | SMATCH_FAST - | SMATCH_FASTER - | SMATCH_FASTEST - deriving Enum - --- | This type exists only to be a tag for Ptr to an underlying C-struct called --- xd3_stream. -data Xd3Stream - --- | This is the record of a pre-compiled configuration, a subset of --- xd3_config. (struct _xd3_smatcher) -data StringMatcher = StringMatcher - { smName :: String - , smStringMatch :: FunPtr (Ptr Xd3Stream -> ErrorCode) - , smLargeLook :: Usize_t - , smLargeStep :: Usize_t - , smSmallLook :: Usize_t - , smSmallChain :: Usize_t - , smSmallLchain :: Usize_t - , smMaxLazy :: Usize_t - , smLongEnough :: Usize_t - } diff --git a/haskell/offset.h b/haskell/offset.h new file mode 100644 index 0000000..0f344a2 --- /dev/null +++ b/haskell/offset.h @@ -0,0 +1,2 @@ + +#define hsc_off(t, f) hsc_printf("(Offset (%ld) :: Offset (%ld))", (long) offsetof (t, f), (long) offsetof (t, f)); diff --git a/lazy.hs b/lazy.hs new file mode 100644 index 0000000..5bbfa51 --- /dev/null +++ b/lazy.hs @@ -0,0 +1,30 @@ +{-# LANGUAGE NoMonomorphismRestriction #-} +import Control.Monad.ST +import Control.Monad.ST.Unsafe +import Data.VCDIFF +import qualified Data.ByteString as B + +step2 = undefined + +step1 = do + xs <- unsafeInterleaveST $ step2 + return $ 3 : xs + +test f = do + xs <- f $ step2 + return $ 3 : xs + +test2 f = withByteString B.empty $ \_ _ -> do + xs <- f $ step2 + return $ 3 : xs + +main = do + let xs = runST step1 + ys = runST (test id) + zs = runST (test unsafeInterleaveST) + vs = runST (test2 unsafeInterleaveST) + print $ take 1 xs + -- print $ take 1 ys + print $ take 1 zs + print $ take 1 vs + diff --git a/xdelta3.cabal b/xdelta3.cabal index bc8a81b..6907ebc 100644 --- a/xdelta3.cabal +++ b/xdelta3.cabal @@ -1,36 +1,27 @@ cabal-version: 2.2 name: xdelta -version: 3.2.0 +version: 3.1.0 synopsis: VCDIFF encoder/decoder. -- description: homepage: xdelta.org license: Apache-2.0 license-file: xdelta3/LICENSE -author: Josh MacDonald -maintainer: josh.macdonald@gmail.com --- copyright: +author: Joe Crayne +maintainer: joe@jerkface.net category: Data --- build-type: Custom build-type: Simple extra-source-files: xdelta3/*.h xdelta3/*.c library - exposed-modules: XDelta.Types - , Data.XDelta - , Data.BA + exposed-modules: Data.VCDIFF.Types + , Data.VCDIFF + , Data.Primitive.ByteArray.Util build-tools: hsc2hs - -- include-dirs: xdelta3 xdelta3_lib - -- extra-lib-dirs: xdelta3_lib include-dirs: haskell . - -- cc-options: -std=c++14 -Wno-literal-suffix - cxx-options: -Wno-literal-suffix -g cpp-options: -DNOT_MAIN=1 -DREGRESSION_TEST=1 -DSECONDARY_DJW=1 -DSECONDARY_FGK=1 -DXD3_MAIN=1 -DXD3_DEBUG=0 -DHAVE_CONFIG - -- cpp-options: -DHAVE_CONFIG_H -DSIZEOF_SIZE_T=__SIZEOF_SIZE_T__ -DSIZEOF_UNSIGNED_INT=__SIZEOF_INT__ -DSIZEOF_UNSIGNED_LONG=__SIZEOF_LONG__ - -- cpp-options: -DSIZEOF_UNSIGNED_LONG_LONG=__SIZEOF_LONG_LONG__ - -- -DHAVE_CONFIG_H=1 - -- -include xdelta3/build2/config.h -Wall -Wshadow -fno-builtin -Wextra -Wsign-compare -Wformat=2 -Wno-format-nonliteral -Wno-unused-parameter -Wno-unused-function extra-libraries: xdelta3, lzma, stdc++, m + cxx-options: -Wno-literal-suffix -g cxx-sources: haskell/xdelta3.cc hs-source-dirs: haskell -- cgit v1.2.3