{-# LANGUAGE BangPatterns #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE PatternSynonyms #-} module XDelta where import Control.Exception import Control.Monad import Data.Bits import qualified Data.ByteString as B import qualified Data.ByteString.Internal as B import Data.Function import Data.Monoid import qualified Data.Text as T import Data.Text.Encoding import Data.Word import Foreign.C.Types import Foreign.C.String import Foreign.ForeignPtr import Foreign.Marshal.Alloc import Foreign.Marshal.Utils import Foreign.Ptr import Foreign.Storable import System.IO import System.IO.Error import System.IO.Unsafe import Data.VCDIFF.Types #ifndef SIZEOF_SIZE_T #define SIZEOF_SIZE_T __SIZEOF_SIZE_T__ #define SIZEOF_UNSIGNED_INT __SIZEOF_INT__ #define SIZEOF_UNSIGNED_LONG __SIZEOF_LONG__ #define SIZEOF_UNSIGNED_LONG_LONG __SIZEOF_LONG_LONG__ #define static_assert(...) #endif #include newtype Stream = Stream (ForeignPtr Stream) -- | Settings for the secondary compressor. data CompressorConfig = CompressorConfig { ngroups :: Usize_t -- ^ Number of DJW Huffman groups. , sector_size :: Usize_t -- ^ Sector size. , inefficient :: Int -- ^ If true, ignore efficiency check [avoid XD3_NOSECOND]. } matcher :: SMatchSelect -> StringMatcher matcher select = unsafePerformIO $ do let config = (init_config (mempty :: Flags)) { winsize = 40, smatch_cfg = Right select } Stream fptr <- throwXD $ config_stream config m <- withForeignPtr fptr $ \stream -> do let smatcher = (#ptr xd3_stream, smatcher) stream nmptr = (#ptr xd3_smatcher, name) smatcher nm <- peekCString nmptr StringMatcher nm <$> (#peek xd3_smatcher, string_match) smatcher <*> (#peek xd3_smatcher, large_look) smatcher <*> (#peek xd3_smatcher, large_step) smatcher <*> (#peek xd3_smatcher, small_look) smatcher <*> (#peek xd3_smatcher, small_chain) smatcher <*> (#peek xd3_smatcher, small_lchain) smatcher <*> (#peek xd3_smatcher, max_lazy) smatcher <*> (#peek xd3_smatcher, long_enough) smatcher finalizeForeignPtr fptr return m -- | Default configured value of stream->winsize. If the program -- supplies xd3_encode_input() with data smaller than winsize the -- stream will automatically buffer the input, otherwise the input -- buffer is used directly. pattern XD3_DEFAULT_WINSIZE = #const XD3_DEFAULT_WINSIZE -- 8 MiB instance Storable Config where sizeOf _ = #const sizeof(xd3_config) alignment _ = 1 poke p cfg = do (#poke xd3_config, winsize) p $ winsize cfg (#poke xd3_config, flags) p $ flags cfg peek p = do winsize <- (#peek xd3_config, winsize) p flags <- (#peek xd3_config, flags) p return Config { winsize = winsize , flags = flags } type CGetBlk = Ptr Stream -> Ptr Xd3_source -> Xoff_t -> IO CInt foreign import ccall "wrapper" wrapGetBlk :: CGetBlk -> IO (FunPtr CGetBlk) foreign import ccall "wrapper" wrapFinalizer :: (Ptr Stream -> IO ()) -> IO (FunPtr (Ptr Stream -> IO ())) -- | For convenience, zero & initialize the xd3_config structure with specified -- flags. init_config :: Flags -> Config init_config flags = Config { winsize = 0 , flags = flags } foreign import ccall "xdelta3.h xd3_config_stream" xd3_config_stream :: Ptr Stream -> Ptr xd3_config -> IO ErrorCode -- | xd3_free_stream frees all memory allocated for the stream. The -- application is responsible for freeing any of the resources it -- supplied. foreign import ccall "xdelta3.h xd3_free_stream" xd3_free_stream :: Ptr Stream -> IO () -- The xd3_config structure is used to initialize a stream - all data -- is copied into stream so config may be a temporary variable. See -- the [documentation] or comments on the xd3_config structure. config_stream :: Config -> IO (Either ErrorCode Stream) config_stream cfg = do pstream <- callocBytes (#const sizeof(xd3_stream)) fptr <- newForeignPtr finalizerFree pstream wrapFinalizer xd3_free_stream >>= (`addForeignPtrFinalizer` fptr) wrapFinalizer unset_source >>= (`addForeignPtrFinalizer` fptr) wrapFinalizer unset_header >>= (`addForeignPtrFinalizer` fptr) with cfg $ \pcfg -> do code <- xd3_config_stream pstream pcfg case code of ErrorCode 0 -> return . Right $ Stream fptr _ -> do free pstream return . Left $ code throwXD :: IO (Either ErrorCode a) -> IO a throwXD action = action >>= either throwIO return example_setup :: IO Stream example_setup = do let config = (init_config (mempty :: Flags)) { winsize = 32768 } stream <- throwXD $ config_stream config return stream newtype Xd3_source = Xd3_source (Ptr Xd3_source) data CurrentBlock = CurrentBlock { blkno :: !Xoff_t -- ^ current block number , blkSize :: !Usize_t -- ^ number of bytes on current block: must be >= 0 and <= 'srcBlockSize' , blkPtr :: !(Ptr Word8) -- ^ current block array } -- XD3_TOOFARBACK block is too old -- XD3_INVALID_INPUT type GetBlock = CurrentBlock -> Xoff_t -> IO (Either ErrorCode CurrentBlock) data Source = Source { srcName :: String -- ^ name for debug/print purposes , srcBlockSize :: Usize_t -- ^ block size , srcMaxWinSize :: Xoff_t -- ^ maximum visible buffer , srcGetBlock :: Maybe GetBlock } foreign import ccall "xdelta3.h xd3_set_source" xd3_set_source :: Ptr Stream -> Ptr Xd3_source -> IO ErrorCode requestedBlockNumber :: Stream -> IO Xoff_t requestedBlockNumber (Stream fptr) = withForeignPtr fptr $ \stream -> do psrc <- (#peek xd3_stream, src) stream (#peek xd3_source, getblkno) psrc peekCurrentBlock :: Stream -> IO CurrentBlock peekCurrentBlock (Stream fptr) = withForeignPtr fptr $ \stream -> do psrc <- (#peek xd3_stream, src) stream CurrentBlock <$> (#peek xd3_source, curblkno) psrc <*> (#peek xd3_source, onblk) psrc <*> (#peek xd3_source, curblk) psrc pokeCurrentBlock :: Stream -> CurrentBlock -> IO () pokeCurrentBlock (Stream fptr) (CurrentBlock no sz ptr) = withForeignPtr fptr $ \stream -> do psrc <- (#peek xd3_stream, src) stream (#poke xd3_source, curblkno) psrc no (#poke xd3_source, onblk ) psrc sz (#poke xd3_source, curblk ) psrc ptr nextOut :: Stream -> ((Ptr Word8, Int) -> IO a) -> IO a nextOut (Stream fptr) action = withForeignPtr fptr $ \stream -> do buf <- (,) <$> (#peek xd3_stream, next_out) stream <*> (#peek xd3_stream, avail_out) stream a <- action buf xd3_consume_output stream return a unset_source :: Ptr Stream -> IO () unset_source stream = do previous_src <- (#peek xd3_stream, src) stream when (previous_src /= nullPtr) $ free previous_src (#poke xd3_stream, src) stream nullPtr unset_header :: Ptr Stream -> IO () unset_header stream = do hdr <- (#peek xd3_stream, enc_appheader) stream when (hdr /= nullPtr) $ free hdr (#poke xd3_stream, enc_appheader) stream nullPtr -- | This function informs the encoder or decoder that source matching -- (i.e., delta-compression) is possible. For encoding, this should -- be called before the first xd3_encode_input. A NULL source is -- ignored. For decoding, this should be called before the first -- window is decoded, but the appheader may be read first -- (XD3_GOTHEADER). After decoding the header, call xd3_set_source() -- if you have a source file. Note: if (stream->dec_win_ind & VCD_SOURCE) -- is true, it means the first window expects there to be a source file. set_source :: Stream -> Source -> IO ErrorCode set_source (Stream fptr) src = withForeignPtr fptr $ \stream -> do unset_source stream let bsname = encodeUtf8 $ T.pack $ srcName src psrc <- callocBytes $ 1 + B.length bsname + (#const sizeof(xd3_source)) let pname = castPtr psrc `plusPtr` (#const sizeof(xd3_source)) copyname ptr (w:ws) = poke ptr w >> copyname (plusPtr ptr 1) ws copyname ptr [] = poke ptr (0 :: Word8) copyname pname (B.unpack bsname) (#poke xd3_source, blksize ) psrc $ srcBlockSize src (#poke xd3_source, name ) psrc pname (#poke xd3_source, max_winsize) psrc $ srcMaxWinSize src (#poke xd3_source, curblkno ) psrc (maxBound :: Xoff_t) srcGetBlock src `forM_` \getBlock -> do cgetblk <- wrapGetBlk $ \stream psrc xoff -> do curblk <- CurrentBlock <$> (#peek xd3_source, curblkno) psrc <*> (#peek xd3_source, onblk) psrc <*> (#peek xd3_source, curblk) psrc ret <- getBlock curblk xoff case ret of Left (ErrorCode ecode) -> return ecode Right (CurrentBlock no sz ptr) -> do (#poke xd3_source, curblkno) psrc no (#poke xd3_source, onblk ) psrc sz (#poke xd3_source, curblk ) psrc ptr return 0 (#poke xd3_stream, getblk) stream cgetblk -- Warning: xdelta3.h documents this as an internal field. -- It's possible to comply with the documentation using the -- 'ioh' field to dispatch, but that would be awkward and -- inefficient. xd3_set_source stream psrc -- | Checks for legal flag changes. foreign import ccall "xdelta3.h xd3_set_flags" xd3_set_flags :: Ptr Stream -> Flags -> IO () foreign import ccall "xdelta3.h xd3_avail_input" xd3_avail_input :: Ptr Stream -> Ptr a -> Usize_t -> IO () foreign import ccall "xdelta3.h xd3_decode_input" xd3_decode_input :: Ptr Stream -> IO ErrorCode foreign import ccall "xdelta3.h xd3_encode_input" xd3_encode_input :: Ptr Stream -> IO ErrorCode set_flags (Stream fp) f = withForeignPtr fp $ (`xd3_set_flags` f) avail_input (Stream fp) p sz = withForeignPtr fp (\stream -> xd3_avail_input stream p sz) decode_input (Stream fp) = withForeignPtr fp xd3_decode_input encode_input (Stream fp) = withForeignPtr fp xd3_encode_input -- xd3_get_appheader may be called in the decoder after XD3_GOTHEADER. -- For convenience, the decoder always adds a single byte padding to -- the end of the application header, which is set to zero in case the -- application header is a string. foreign import ccall "xdelta3.h xd3_get_appheader" xd3_get_appheader :: Ptr Stream -> Ptr (Ptr Word8) -> Ptr Usize_t -> IO ErrorCode foreign import ccall "xdelta3.h xd3_set_appheader" xd3_set_appheader :: Ptr Stream -> Ptr Word8 -> Usize_t -> IO () -- output//source/ appHeader :: Stream -> IO (Maybe String) appHeader (Stream fptr) = withForeignPtr fptr $ \stream -> alloca $ \pp -> alloca $ \psz -> do xd3_get_appheader stream pp psz >>= \case XD3_SUCCESS -> peek pp >>= \case hdr | hdr == nullPtr -> return Nothing | otherwise -> do sz <- peek psz Just <$> peekCStringLen (castPtr pp,fromIntegral sz) _ -> return Nothing -- | This should be called before the first call to xd3_encode_input() to -- include application-specific data in the VCDIFF header. -- -- Note: This is used for encoding and is not the same field retrieved by -- 'appHeader'. setAppHeader :: Stream -> String -> IO () setAppHeader (Stream fptr) hdr = withForeignPtr fptr $ \stream -> do withCStringLen hdr $ \(dta,sz) -> do phdr <- mallocBytes sz copyBytes phdr (castPtr dta) sz xd3_set_appheader stream phdr (fromIntegral sz) sourceFromHandle :: Usize_t -> Ptr Word8 -> Handle -> Source sourceFromHandle blksize ptr h = Source { srcName = "sourceFromHandle" , srcBlockSize = blksize , srcMaxWinSize = blksize , srcGetBlock = Just $ \_ num -> do hSeek h AbsoluteSeek (fromIntegral blksize * fromIntegral num) cnt <- hGetBuf h ptr (fromIntegral blksize) return $ Right (CurrentBlock num (fromIntegral cnt) ptr) `catchIOError` \_ -> return $ Left XD3_TOOFARBACK } example_set_source stream = do h <- openFile "source-file.bin" ReadMode buf <- mallocBytes 32768 let source = (sourceFromHandle 32768 buf h) { srcName = "source-file.bin" } ret <- set_source stream source return ret withFileSource :: Config -> FilePath -> Int -> (Stream -> IO a) -> IO a withFileSource cfg fname blksize action = do stream <- throwXD $ config_stream cfg withFile fname ReadMode $ \h -> do allocaBytes blksize $ \buf -> do code <- set_source stream (sourceFromHandle (fromIntegral blksize) buf h) { srcName = fname } case code of XD3_SUCCESS -> action stream _ -> throwIO code example_input_loop stream inp = do allocaBytes 4096 $ \indata -> fix $ \loop -> do insize <- hGetBuf inp indata 4096 let reached_EOF = insize < 4096 when reached_EOF $ set_flags stream XD3_FLUSH avail_input stream indata (fromIntegral insize) fix $ \process -> do ret <- decode_input stream case ret of XD3_INPUT -> when (not reached_EOF) loop XD3_OUTPUT -> do -- todo write data process XD3_GETSRCBLK -> do -- todo set source block process XD3_GOTHEADER -> process -- No XD3_WINSTART -> process -- action XD3_WINFINISH -> process -- neccessary _ -> throwIO (userError "Unexpected return code from decode_input.") {- data BufferEater m where Lazy :: (ByteString -> m -> m) -> BufferEater m Strict :: (ByteString -> IO ()) -> BufferEater () -} data XDeltaMethods m = XDeltaMethods { xConfig :: Config , xGetSource :: Xoff_t -> B.ByteString , xOutput :: Ptr Word8 -> Int -> IO m , xOnError :: ErrorCode -> String -> IO m , xBlockSize :: Usize_t } -- | This acknowledges receipt of output data, must be called after any -- XD3_OUTPUT return. foreign import ccall "xdelta3.h xd3_consume_output" xd3_consume_output :: Ptr Stream -> IO () -- -- built in to 'nextOut' -- acknowledgeOutput :: Stream -> IO () -- acknowledgeOutput (Stream fptr) = withForeignPtr fptr $ xd3_consume_output foreign import ccall "xdelta3.h xd3_errstring" xd3_errstring :: Ptr Stream -> IO CString -- | Gives some extra information about the latest library error, if any -- is known. errorString (Stream fptr) = withForeignPtr fptr $ \stream -> do cstring <- xd3_errstring stream peekCString cstring xdelta :: Monoid m => XDeltaMethods m -> (Stream -> IO ErrorCode) -> [B.ByteString] -> IO m xdelta x xxcode_input ds = do stream <- throwXD $ config_stream (xConfig x) set_source stream Source { srcName = "XDeltaMethods" , srcBlockSize = xBlockSize x , srcMaxWinSize = xBlockSize x , srcGetBlock = Nothing } let go withBlk (d:ds) = do let (fp,off,len) = B.toForeignPtr d withForeignPtr fp $ \indata0 -> do let indata = indata0 `plusPtr` off eof = null ds when eof $ set_flags stream XD3_FLUSH avail_input stream indata (fromIntegral len) go2 withBlk eof ds go2 withBlk eof ds = do ret <- withBlk $ xxcode_input stream case ret of XD3_INPUT -> if (not eof) then go withBlk ds else return mempty XD3_OUTPUT -> do m' <- nextOut stream (uncurry $ xOutput x) ms <- unsafeInterleaveIO $ go2 withBlk eof ds return $ m' <> ms XD3_GETSRCBLK -> do n <- requestedBlockNumber stream let blk = xGetSource x n withBlk' act = let (fp,off,len) = B.toForeignPtr blk in withForeignPtr fp $ \p -> do pokeCurrentBlock stream $ CurrentBlock n (fromIntegral len) (plusPtr p off) act go2 withBlk' eof ds XD3_GOTHEADER -> go2 withBlk eof ds -- No XD3_WINSTART -> go2 withBlk eof ds -- action XD3_WINFINISH -> go2 withBlk eof ds -- neccessary -- -- These are set for each XD3_WINFINISH return. -- xd3_encoder_used_source :: Ptr Stream -> IO Bool -- xd3_encoder_srcbase :: Ptr Stream -> IO Xoff_t -- xd3_encoder_srclen :: Ptr Stream -> IO Usize_t e -> do s <- errorString stream xOnError x e s go id ds