Some documentation.

author: Joe Crayne <joe@jerkface.net> 2018-10-28 20:06:51 -0400
committer: Joe Crayne <joe@jerkface.net> 2018-10-28 20:06:51 -0400
commit: b344e040b82cfbdb8b82bebd397f18661d7f88c7 (patch)
tree: e6ccc916d52317bb3b8b9eacf20ccf0647feeadd
parent: aebdc1ce3ed5b53ba69dd3aa0f37d1ffefbf5c7d (diff)
2 files changed, 60 insertions, 24 deletions
diff --git a/haskell/Data/VCDIFF.hs b/haskell/Data/VCDIFF.hs
index 78f66ff..c48eb1a 100644
--- a/haskell/Data/VCDIFF.hs
+++ b/haskell/Data/VCDIFF.hs
@@ -11,23 +11,18 @@
 {-# LANGUAGE RankNTypes                 #-}
 {-# LANGUAGE TypeFamilies               #-}
 {-# LANGUAGE TypeOperators              #-}
-- |
+-- | Create and apply binary diffs (in the 'VCDIFF' format) to lazy bytestrings.
--
+module Data.VCDIFF
-- Create and apply binary diffs in the VCDIFF format.
+  ( VCDIFF
--
+  , encodeVCDIFF
-- To create a diff:
+  , decodeVCDIFF
--
+  , Config(..)
-- > diff = computeDiff defaultConfig source target
+  , defaultConfig
--
+  , Flags
-- To apply a change to produce an updated version:
+  , pattern XD3_ADLER32
--
+  , Result(..)
-- > target = applyPatch defaultConfig source diff
+  , computeDiff
--
+  , applyPatch) where
-- Unlike typical text patches, context is ignored and
-- there is no fuzz. This means the file you apply
-- the patch to must have identical contents to the source
-- used to create it.
-module Data.VCDIFF where
 import Control.Monad
 import Control.Monad.Primitive
@@ -211,21 +206,37 @@ encode_input :: PrimMonad m => Stream m -> m ErrorCode
 encode_input stream =
    unsafeIOToPrim $ xd3_encode_input (streamArrayPtr $ streamArray stream)
-- RFC 3284
+-- | A binary diff (or patch) in the VCDIFF format documented by RFC 3284.
-newtype VCDIFF = VCDIFF L.ByteString
+--
+-- When used as a patch, context is ignored and there is no fuzz. This means
+-- the file you apply the patch to must have identical contents to the source
+-- used to create it.  /WARNING:/ This wont be checked unless 'XD3_ADLER32' flag
+-- was specified to 'computeDiff'.
+newtype VCDIFF = VCDIFF { encodeVCDIFF :: L.ByteString }
 deriving Show
+decodeVCDIFF :: L.ByteString -> Either String VCDIFF
+decodeVCDIFF = Right . VCDIFF
 chunksOf :: Usize_t -> L.ByteString -> [B.ByteString]
 chunksOf len bs | L.null bs = []
                | otherwise = let (b,bs') = L.splitAt (fromIntegral len) bs
                              in L.toStrict b : chunksOf len bs'
+-- | Compute a binary diff.  For most options, use 'defaultConfig', but you may
+-- want to set 'flags' to 'XD3_ADLER32' to add checksumming safety to the
+-- patch, and a larger 'chunk_size' may yield greater compression.
 computeDiff :: Config -> L.ByteString -> L.ByteString -> Result VCDIFF
 computeDiff cfg source patched = fmap VCDIFF $ xdeltaPure encode_input cfg source patched
+-- | Apply a patch.  It is okay to use 'defaultConfig' for most options, but
+-- you may want to specify an alternative'chunk_size' for streaming.
 applyPatch :: Config -> L.ByteString -> VCDIFF -> Result L.ByteString
 applyPatch cfg source (VCDIFF delta) = xdeltaPure decode_input cfg source delta
+-- | The result of a computation that may fail.  On failure, the 'result' field
+-- is truncated or invalid.
 data Result x = Result
    { result :: x -- ^ A possibly invalid result.  To consume a lazy stream with fusion, avoid
                  -- evaluating 'resultError' until this field is fully processed.
@@ -257,6 +268,11 @@ xdeltaPure codec cfg source input =
                }
    in runST $ xdelta x codec ds
+-- | Sensible defaults.  All of these configuration items are passed on to the
+-- xdelta algorithm except 'chunk_size' which is used by 'computeDiff' and
+-- 'applyPatch' to divide the input into chunks (see 'chunksOf').
+--
+-- Consider enabling flags = 'XD3_ADLER32' for added safety.
 defaultConfig :: Config
 defaultConfig = Config
  { winsize    = XD3_DEFAULT_WINSIZE
diff --git a/haskell/examples/testdiff.hs b/haskell/examples/testdiff.hs
index 7e20dc5..3f6e3e2 100644
--- a/haskell/examples/testdiff.hs
+++ b/haskell/examples/testdiff.hs
@@ -10,22 +10,42 @@ source = "It could be said that Joe was here. I don't know what to do about it."
 patched :: L.ByteString
 patched = "It could be said that Joe, the magnificent, was here.  I don't know what to do about it."
+source2 :: L.ByteString
+source2 = "It could be said that Joe was absolutely here. I don't know what to do about it."
 delta :: Result VCDIFF
 delta = computeDiff defaultConfig source patched
+delta2 :: Result VCDIFF
+delta2 = computeDiff defaultConfig { flags = XD3_ADLER32 } source patched
 main = do
+    putStrLn "source"
    mapM_ putStrLn $ xxd2 0 (L.toStrict source)
    putStrLn ""
+    putStrLn "target"
    mapM_ putStrLn $ xxd2 0 (L.toStrict patched)
    putStrLn ""
+    case delta2 of
+        Result δ me -> do
+            let d = encodeVCDIFF δ
+            putStrLn "diff(XD3_ADLER32)"
+            mapM_ putStrLn $ xxd2 0 (L.toStrict d)
+            print me
+            putStrLn ""
+            let Result patched' pe = applyPatch defaultConfig source2 δ
+            putStrLn "patched(XD3_ADLER32)"
+            mapM_ putStrLn $ xxd2 0 (L.toStrict patched') -- $ L.take 48 patched')
+            print pe
+    putStrLn ""
    case delta of
-        Result δ@(VCDIFF d) me -> do
+        Result δ me -> do
-            -- mapM_ (mapM_ putStrLn . xxd2 0) (chunksOf 16 d)
+            let d = encodeVCDIFF δ
+            putStrLn "diff(default)"
            mapM_ putStrLn $ xxd2 0 (L.toStrict d)
            print me
            putStrLn ""
-            let Result patched' pe = applyPatch defaultConfig source δ
+            putStrLn "patched(default)"
+            let Result patched' pe = applyPatch defaultConfig source2 δ
            mapM_ putStrLn $ xxd2 0 (L.toStrict patched') -- $ L.take 48 patched')
            print pe
-            print ("source",source)
-            print ("patched",patched)
author	Joe Crayne <joe@jerkface.net>	2018-10-28 20:06:51 -0400
committer	Joe Crayne <joe@jerkface.net>	2018-10-28 20:06:51 -0400
commit	b344e040b82cfbdb8b82bebd397f18661d7f88c7 (patch)
tree	e6ccc916d52317bb3b8b9eacf20ccf0647feeadd
parent	aebdc1ce3ed5b53ba69dd3aa0f37d1ffefbf5c7d (diff)