From 05ae232e966eccae46d535126a579740b6cf780d Mon Sep 17 00:00:00 2001 From: Joe Crayne Date: Thu, 25 Oct 2018 23:39:44 -0400 Subject: Added chunk_size parameter to high-level streaming interface. --- examples/testdiff.hs | 8 +++-- haskell/Data/XDelta.hsc | 81 +++++++++++++++++++++++++++--------------------- haskell/XDelta/Types.hsc | 1 + 3 files changed, 51 insertions(+), 39 deletions(-) diff --git a/examples/testdiff.hs b/examples/testdiff.hs index 9360545..2847fc3 100644 --- a/examples/testdiff.hs +++ b/examples/testdiff.hs @@ -10,7 +10,7 @@ source = "It could be said that Joe was here. I don't know what to do about it." patched :: L.ByteString patched = "It could be said that Joe, the magnificent, was here. I don't know what to do about it." -delta :: XDeltaFailable XDelta +delta :: Result VCDIFF delta = computeDiff defaultConfig source patched main = do @@ -19,11 +19,13 @@ main = do mapM_ putStrLn $ xxd2 0 (L.toStrict patched) putStrLn "" case delta of - XResult δ@(XDelta d) me -> do + Result δ@(VCDIFF d) me -> do -- mapM_ (mapM_ putStrLn . xxd2 0) (chunksOf 16 d) mapM_ putStrLn $ xxd2 0 (L.toStrict d) print me putStrLn "" - let XResult patched' pe = applyPatch defaultConfig source δ + let Result patched' pe = applyPatch defaultConfig source δ mapM_ putStrLn $ xxd2 0 (L.toStrict patched') -- $ L.take 48 patched') print pe + print ("source",source) + print ("patched",patched) diff --git a/haskell/Data/XDelta.hsc b/haskell/Data/XDelta.hsc index bb08cb6..8128a61 100644 --- a/haskell/Data/XDelta.hsc +++ b/haskell/Data/XDelta.hsc @@ -140,7 +140,7 @@ config_stream cfg = do xd3_abort_stream sptr xd3_close_stream sptr xd3_free_stream sptr - seq s $ return () -- Keep array s alive until the ffi functions finish. + seq srcvar $ seq s $ return () -- Keep array s alive until the ffi functions finish. fp <- newForeignPtr sptr finalize return Stream { streamArray = s @@ -166,7 +166,8 @@ foreign import ccall "xdelta3.h xd3_set_source" xd3_set_source :: Ptr Xd3Stream set_source :: PrimMonad m => Stream m -> String -- ^ name for debug/print purposes. -> Usize_t -- ^ block size - -> Xoff_t -- ^ maximum visible buffer + -> Xoff_t -- ^ maximum visible buffer (Suggested: set same as block size). + -- Rounds up to approx 16k. -> m () set_source stream nm blksz maxwinsz = do let bsname = encodeUtf8 $ T.pack nm @@ -280,7 +281,7 @@ xdelta x xxcode_input ds = do either (\e _ -> xOnError x e "config_stream failed") (flip ($)) mstream $ \stream -> do - set_source stream "xdelta" (xBlockSize x) (xBlockSize x) + set_source stream "VCDIFF" (xBlockSize x) (xBlockSize x) let go withBlk [] = return mempty go withBlk (d:ds) = do let (fp,off,len) = B.toForeignPtr d @@ -331,7 +332,8 @@ encode_input :: PrimMonad m => Stream m -> m ErrorCode encode_input stream = unsafeIOToPrim $ xd3_encode_input (ptr $ mutableByteArrayContents $ streamArray stream) -newtype XDelta = XDelta L.ByteString +-- RFC 3284 +newtype VCDIFF = VCDIFF L.ByteString deriving Show chunksOf :: Usize_t -> L.ByteString -> [B.ByteString] @@ -339,45 +341,52 @@ chunksOf len bs | L.null bs = [] | otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs in L.toStrict b : chunksOf len bs' -computeDiff :: Config -> L.ByteString -> L.ByteString -> XDeltaFailable XDelta -computeDiff cfg source patched = fmap XDelta $ xdeltaPure encode_input cfg source patched - -applyPatch :: Config -> L.ByteString -> XDelta -> XDeltaFailable L.ByteString -applyPatch cfg source (XDelta delta) = xdeltaPure decode_input cfg source delta - -data XDeltaFailable x = XResult { xresult :: x, xerror :: (Maybe (ErrorCode,String)) } - deriving (Show,Functor) - -instance Monoid x => Monoid (XDeltaFailable x) where - mempty = XResult mempty Nothing - mappend (XResult x xe) y = XResult (mappend x $ xresult y) (maybe (xerror y) Just xe) - -xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> XDeltaFailable L.ByteString -xdeltaPure codec cfg source delta = - let smap = IntMap.fromList $ zip [0..] (chunksOf bsize source) - bsize = 4096 - x :: XDeltaMethods (ST s) (XDeltaFailable L.ByteString) +computeDiff :: Config -> L.ByteString -> L.ByteString -> Result VCDIFF +computeDiff cfg source patched = fmap VCDIFF $ xdeltaPure encode_input cfg source patched + +applyPatch :: Config -> L.ByteString -> VCDIFF -> Result L.ByteString +applyPatch cfg source (VCDIFF delta) = xdeltaPure decode_input cfg source delta + +data Result x = Result + { result :: x -- ^ A possibly invalid result. To consume a lazy stream with fusion, avoid + -- evaluating 'resultError' until this field is fully processed. + , resultError :: Maybe (ErrorCode,String) + -- ^ If something went wrong while producing 'result', this + -- is an error code and message indicating what. + } deriving (Show,Functor) + +instance Monoid x => Monoid (Result x) where + mempty = Result mempty Nothing + mappend (Result x xe) y = Result (mappend x $ result y) (maybe (resultError y) Just xe) + +xdeltaPure :: (forall s. Stream (ST s) -> ST s ErrorCode) -> Config -> L.ByteString -> L.ByteString -> Result L.ByteString +xdeltaPure codec cfg source input = + let bsize = chunk_size cfg + ds = chunksOf bsize input + smap = IntMap.fromList $ zip [0..] (chunksOf bsize source) + x :: XDeltaMethods (ST s) (Result L.ByteString) x = XDeltaMethods - { xConfig = cfg - , xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of + { xConfig = cfg + , xGetSource = \i -> case IntMap.lookup (fromIntegral i) smap of Nothing -> B.empty Just bs -> bs - , xOutput = \ptr len -> unsafeIOToST $ flip XResult Nothing . L.fromStrict <$> B.packCStringLen (castPtr ptr,len) - , xOnError = \e s -> return (XResult L.empty (Just (e,s))) -- :: ErrorCode -> String -> m u - , xBlockSize = bsize -- :: Usize_t - , xInterleave = unsafeInterleaveST -- :: forall a. m a -> m a + , xOutput = \ptr len -> unsafeIOToST $ flip Result Nothing . L.fromStrict + <$> B.packCStringLen (castPtr ptr,len) + , xOnError = \e s -> return (Result L.empty (Just (e,s))) + , xBlockSize = bsize + , xInterleave = unsafeInterleaveST } - ds = chunksOf bsize delta -- L.toChunks delta in runST $ xdelta x codec ds defaultConfig :: Config defaultConfig = Config - { winsize = XD3_DEFAULT_WINSIZE - , sprevsz = XD3_DEFAULT_SPREVSZ - , iopt_size = XD3_DEFAULT_IOPT_SIZE - , flags = mempty - , sec_data = CompressorConfig 0 0 0 - , sec_inst = CompressorConfig 0 0 0 - , sec_addr = CompressorConfig 0 0 0 + { winsize = XD3_DEFAULT_WINSIZE + , sprevsz = XD3_DEFAULT_SPREVSZ + , iopt_size = XD3_DEFAULT_IOPT_SIZE + , flags = mempty + , sec_data = CompressorConfig 0 0 0 + , sec_inst = CompressorConfig 0 0 0 + , sec_addr = CompressorConfig 0 0 0 , smatch_cfg = Right SMATCH_DEFAULT + , chunk_size = 4096 } diff --git a/haskell/XDelta/Types.hsc b/haskell/XDelta/Types.hsc index a0f2cfa..8a60805 100644 --- a/haskell/XDelta/Types.hsc +++ b/haskell/XDelta/Types.hsc @@ -113,6 +113,7 @@ data Config = Config , sec_inst :: CompressorConfig -- ^ Secondary compressor config: inst , sec_addr :: CompressorConfig -- ^ Secondary compressor config: addr , smatch_cfg :: Either StringMatcher SMatchSelect -- ^ See enum: use fields below for soft config + , chunk_size :: Usize_t -- ^ Suggested chunking size for streaming. } pattern XD3_DEFAULT_WINSIZE = #const XD3_DEFAULT_WINSIZE -- cgit v1.2.3