{-# LANGUAGE CPP                      #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE TemplateHaskell          #-}
-- | This module contains fuctions and templates for building up and breaking
--   down packed bit structures. It's something like Erlang's bit-syntax (or,
--   actually, more like Python's struct module).
--
--   This code uses Data.ByteString which is included in GHC 6.5 and you can
--   get it for 6.4 at <http://www.cse.unsw.edu.au/~dons/fps.html>
module Data.BitSyntax (
  -- * Building bit structures
  -- | The core function here is makeBits, which is a perfectly normal function.
  --   Here's an example which makes a SOCKS4a request header:
  -- @
  --   makeBits [U8 4, U8 1, U16 80, U32 10, NullTerminated \"username\",
  --             NullTerminated \"www.haskell.org\"]
  -- @
  BitBlock(..),
  makeBits,
  -- * Breaking up bit structures
  -- | The main function for this is bitSyn, which is a template function and
  --   so you'll need to run with @-fth@ to enable template haskell
  --   <http://www.haskell.org/th/>.
  --
  --   To expand the function you use the splice command:
  -- @
  --   $(bitSyn [...])
  -- @
  --
  -- The expanded function has type @ByteString -> (...)@ where the elements of
  -- the tuple depend of the argument to bitSyn (that's why it has to be a template
  -- function).
  --
  -- Heres an example, translated from the Erlang manual, which parses an IP header:
  --
  -- @
  -- decodeOptions bs ([_, hlen], _, _, _, _, _, _, _, _, _)
  --   | hlen > 5  = return $ BS.splitAt (fromIntegral ((hlen - 5) * 4)) bs
  --   | otherwise = return (BS.empty, bs)
  -- @
  --
  -- @
  -- ipDecode = $(bitSyn [PackedBits [4, 4], Unsigned 1, Unsigned 2, Unsigned 2,
  --                      PackedBits [3, 13], Unsigned 1, Unsigned 1, Unsigned 2,
  --                      Fixed 4, Fixed 4, Context \'decodeOptions, Rest])
  -- @
  --
  -- @
  -- ipPacket = BS.pack [0x45, 0, 0, 0x34, 0xd8, 0xd2, 0x40, 0, 0x40, 0x06,
  --                     0xa0, 0xca, 0xac, 0x12, 0x68, 0x4d, 0xac, 0x18,
  --                     0x00, 0xaf]
  -- @
  --
  -- This function has several weaknesses compared to the Erlang version: The
  -- elements of the bit structure are not named in place, instead you have to
  -- do a pattern match on the resulting tuple and match up the indexes. The
  -- type system helps in this, but it's still not quite as nice.

  ReadType(..), bitSyn,

  -- I get errors if these aren't exported (Can't find interface-file
  -- declaration for Data.BitSyntax.decodeU16)
  decodeU8, decodeU16, decodeU32, decodeU16LE, decodeU32LE) where

import Language.Haskell.TH.Lib
import Language.Haskell.TH.Syntax

import qualified Data.ByteString as BS
import Data.Char (ord)
import Control.Monad
-- import Test.QuickCheck (Arbitrary(), arbitrary, Gen())

import Foreign

foreign import ccall unsafe "htonl" htonl :: Word32 -> Word32
foreign import ccall unsafe "htons" htons :: Word16 -> Word16

-- There's no good way to convert to little-endian. The htons functions only
-- convert to big endian and they don't have any little endian friends. So we
-- need to detect which kind of system we are on and act accordingly. We can
-- detect the type of system by seeing if htonl actaully doesn't anything (it's
-- the identity function on big-endian systems, of course). If it doesn't we're
-- on a big-endian system and so need to do the byte-swapping in Haskell because
-- the C functions are no-ops

-- | A native Haskell version of htonl for the case where we need to convert
--   to little-endian on a big-endian system
endianSwitch32 :: Word32 -> Word32
endianSwitch32 a = ((a .&. 0xff) `shiftL` 24) .|.
                   ((a .&. 0xff00) `shiftL` 8) .|.
                   ((a .&. 0xff0000) `shiftR` 8) .|.
                   (a `shiftR` 24)

-- | A native Haskell version of htons for the case where we need to convert
--   to little-endian on a big-endian system
endianSwitch16 :: Word16 -> Word16
endianSwitch16 a = ((a .&. 0xff) `shiftL` 8) .|.
                   (a `shiftR` 8)

littleEndian32 :: Word32 -> Word32
littleEndian32 a = if htonl 1 == 1
                     then endianSwitch32 a
                     else a

littleEndian16 :: Word16 -> Word16
littleEndian16 a = if htonl 1 == 1
                     then endianSwitch16 a
                     else a

data BitBlock = -- | Unsigned 8-bit int
                U8 Int |
                -- | Unsigned 16-bit int
                U16 Int |
                -- | Unsigned 32-bit int
                U32 Int |
                -- | Little-endian, unsigned 16-bit int
                U16LE Int |
                -- | Little-endian, unsigned 32-bit int
                U32LE Int |
                -- | Appends the string with a trailing NUL byte
                NullTerminated String |
                -- | Appends the string without any terminator
                RawString String |
                -- | Appends a ByteString
                RawByteString BS.ByteString |
                -- | Packs a series of bit fields together. The argument is
                --   a list of pairs where the first element is the size
                --   (in bits) and the second is the value. The sum of the
                --   sizes for a given PackBits must be a multiple of 8
                PackBits [(Int, Int)]
                deriving (Show)

-- Encodes a member of the Bits class as a series of bytes and returns the
-- ByteString of those bytes.
getBytes :: (Integral a, Bounded a, FiniteBits a) => a -> BS.ByteString
getBytes input =
    let getByte _ 0 = []
        getByte x remaining = (fromIntegral $ (x .&. 0xff)) :
                              getByte (shiftR x 8) (remaining - 1)
        in
        if (finiteBitSize input `mod` 8) /= 0
           then error "Input data bit size must be a multiple of 8"
           else BS.pack $ getByte input (finiteBitSize input `div` 8)

-- Performs the work behind PackBits
packBits :: (Word8, Int, [Word8])  -- ^ The current byte, the number of bits
                                   --   used in that byte and the (reverse)
                                   --   list of produced bytes
         -> (Int, Int)  -- ^ The size (in bits) of the value, and the value
         -> (Word8, Int, [Word8])  -- See first argument
packBits (current, used, bytes) (size, value) =
  if bitsWritten < size
    then packBits (0, 0, current' : bytes) (size - bitsWritten, value)
    else if used' == 8
           then (0, 0, current' : bytes)
           else (current', used', bytes)
  where
    top = size - 1
    topOfByte = 7 - used
    aligned = value `shift` (topOfByte - top)
    newBits = (fromIntegral aligned) :: Word8
    current' = current .|. newBits
    bitsWritten = min (8 - used) size
    used' = used + bitsWritten

bits :: BitBlock -> BS.ByteString
bits (U8 v) = BS.pack [((fromIntegral v) :: Word8)]
bits (U16 v) = getBytes ((htons $ fromIntegral v) :: Word16)
bits (U32 v) = getBytes ((htonl $ fromIntegral v) :: Word32)
bits (U16LE v) = getBytes (littleEndian16 $ fromIntegral v)
bits (U32LE v) = getBytes (littleEndian32 $ fromIntegral v)
bits (NullTerminated str) = BS.pack $ (map (fromIntegral . ord) str) ++ [0]
bits (RawString str) = BS.pack $ map (fromIntegral . ord) str
bits (RawByteString bs) = bs
bits (PackBits bitspec) =
  if (sum $ map fst bitspec) `mod` 8 /= 0
    then error "Sum of sizes of a bit spec must == 0 mod 8"
    else (\(_, _, a) -> BS.pack $ reverse a) $ foldl packBits (0, 0, []) bitspec

-- | Make a binary string from the list of elements given
makeBits :: [BitBlock] -> BS.ByteString
makeBits = BS.concat . (map bits)

data ReadType = -- | An unsigned number of some number of bytes. Valid
                --   arguments are 1, 2 and 4
                Unsigned Integer |
                -- | An unsigned, little-endian integer of some number of
                --   bytes. Valid arguments are 2 and 4
                UnsignedLE Integer |
                -- | A variable length element to be decoded by a custom
                --   function. The function's name is given as the single
                --   argument and should have type
                --   @Monad m => ByteString -> m (v, ByteString)@
                Variable Name |
                -- | Skip some number of bytes
                Skip Integer |
                -- | A fixed size field, the result of which is a ByteString
                --   of that length.
                Fixed Integer |
                -- | Decode a value and ignore it (the result will not be part
                --   of the returned tuple)
                Ignore ReadType |
                -- | Like variable, but the decoding function is passed the
                --   entire result tuple so far. Thus the function whose name
                --   passed has type
                --   @Monad m => ByteString -> (...) -> m (v, ByteString)@
                Context Name |
                -- | Takes the most recent element of the result tuple and
                --   interprets it as the length of this field. Results in
                --   a ByteString
                LengthPrefixed |
                -- | Decode a series of bit fields, results in a list of
                --   Integers. Each element of the argument is the length of
                --   the bit field. The sums of the lengths must be a multiple
                --   of 8
                PackedBits [Integer] |
                -- | Results in a ByteString containing the undecoded bytes so
                --   far. Generally used at the end to return the trailing body
                --   of a structure, it can actually be used at any point in the
                --   decoding to return the trailing part at that point.
                Rest

fromBytes :: (Num a, Bits a) => [a] -> a
fromBytes input =
    let dofb accum [] = accum
        dofb accum (x:xs) = dofb ((shiftL accum 8) .|. x) xs
        in
        dofb 0 $ reverse input

decodeU8 :: BS.ByteString -> Word8
decodeU8 = fromIntegral . head . BS.unpack
decodeU16 :: BS.ByteString -> Word16
decodeU16 = htons . fromBytes . map fromIntegral . BS.unpack
decodeU32 :: BS.ByteString -> Word32
decodeU32 = htonl . fromBytes . map fromIntegral . BS.unpack
decodeU16LE :: BS.ByteString -> Word16
decodeU16LE = littleEndian16 . fromBytes . map fromIntegral . BS.unpack
decodeU32LE :: BS.ByteString -> Word32
decodeU32LE = littleEndian32 . fromBytes . map fromIntegral . BS.unpack

decodeBits :: [Integer] -> BS.ByteString -> [Integer]
decodeBits sizes bs =
  reverse values
  where
    (values, _, _) = foldl unpackBits ([], 0, BS.unpack bitdata) sizes
    bytesize = (sum sizes) `shiftR` 3
    (bitdata, _) = BS.splitAt (fromIntegral bytesize) bs

unpackBits :: ([Integer], Integer, [Word8]) -> Integer -> ([Integer], Integer, [Word8])
unpackBits state size = unpackBitsInner 0 state size

unpackBitsInner :: Integer ->
                   ([Integer], Integer, [Word8]) ->
                   Integer ->
                   ([Integer], Integer, [Word8])
unpackBitsInner _ (output, used, []) _ = (output, used, [])
unpackBitsInner val (output, used, current : input) bitsToGet =
  if bitsToGet' > 0
    then unpackBitsInner val'' (output, 0, input) bitsToGet'
    else if used' < 8
           then (val'' : output, used', current'' : input)
           else (val'' : output, 0, input)
  where
    bitsAv = 8 - used
    bitsTaken = min bitsAv bitsToGet
    val' = val `shift` (fromIntegral bitsTaken)
    current' = current `shiftR` (fromIntegral (8 - bitsTaken))
    current'' = current `shiftL` (fromIntegral bitsTaken)
    val'' = val' .|. (fromIntegral current')
    bitsToGet' = bitsToGet - bitsTaken
    used' = used + bitsTaken

readElement :: ([Stmt], Name, [Name]) -> ReadType -> Q ([Stmt], Name, [Name])

readElement (stmts, inputname, tuplenames) (Context funcname) = do
  valname <- newName "val"
  restname <- newName "rest"

  let stmt = BindS (TupP [VarP valname, VarP restname])
                   (AppE (AppE (VarE funcname)
                               (VarE inputname))
#if MIN_VERSION_template_haskell(2,16,0)
                         (TupE $ map (Just . VarE) $ reverse tuplenames))
#else
                         (TupE $ map VarE $ reverse tuplenames))
#endif

  return (stmt : stmts, restname, valname : tuplenames)

readElement (stmts, inputname, tuplenames) (Fixed n) = do
  valname <- newName "val"
  restname <- newName "rest"
  let dec1 = ValD (TupP [VarP valname, VarP restname])
                  (NormalB $ AppE (AppE (VarE 'BS.splitAt)
                                        (LitE (IntegerL n)))
                                  (VarE inputname))
                  []

  return (LetS [dec1] : stmts, restname, valname : tuplenames)

readElement state@(_, _, tuplenames) (Ignore n) = do
  (a, b, _) <- readElement state n
  return (a, b, tuplenames)

readElement (stmts, inputname, tuplenames) LengthPrefixed = do
  valname <- newName "val"
  restname <- newName "rest"

  let sourcename = head tuplenames
      dec = ValD (TupP [VarP valname, VarP restname])
                 (NormalB $ AppE (AppE (VarE 'BS.splitAt)
                                       (AppE (VarE 'fromIntegral)
                                             (VarE sourcename)))
                                 (VarE inputname))
                 []

  return (LetS [dec] : stmts, restname, valname : tuplenames)

readElement (stmts, inputname, tuplenames) (Variable funcname) = do
  valname <- newName "val"
  restname <- newName "rest"

  let stmt = BindS (TupP [VarP valname, VarP restname])
                   (AppE (VarE funcname) (VarE inputname))

  return (stmt : stmts, restname, valname : tuplenames)

readElement (stmts, inputname, tuplenames) Rest = do
  restname <- newName "rest"
  let dec = ValD (VarP restname)
                 (NormalB $ VarE inputname)
                 []
  return (LetS [dec] : stmts, inputname, restname : tuplenames)

readElement (stmts, inputname, tuplenames) (Skip n) = do
  -- Expands to something like:
  --   rest = Data.ByteString.drop n input
  restname <- newName "rest"
  let dec = ValD (VarP restname)
                 (NormalB $ AppE (AppE (VarE 'BS.drop)
                                       (LitE (IntegerL n)))
                                 (VarE inputname))
                 []
  return (LetS [dec] : stmts, restname, tuplenames)

readElement state (Unsigned size) = do
  -- Expands to something like:
  --    (aval, arest) = Data.ByteString.splitAt 1 input
  --    a = BitSyntax.decodeU8 aval
  let decodefunc = case size of
                     1 -> 'decodeU8
                     2 -> 'decodeU16
                     _ -> 'decodeU32 -- Default to 32
  decodeHelper state (VarE decodefunc) size

readElement state (UnsignedLE size) = do
  -- Expands to something like:
  --    (aval, arest) = Data.ByteString.splitAt 1 input
  --    a = BitSyntax.decodeU8LE aval
  let decodefunc = case size of
                     2 -> 'decodeU16LE
                     _ -> 'decodeU32LE -- Default to 4
  decodeHelper state (VarE decodefunc) size

readElement state (PackedBits sizes) =
  if sum sizes `mod` 8 /= 0
    then error "Sizes of packed bits must == 0 mod 8"
    else decodeHelper state
                      (AppE (VarE 'decodeBits)
                            (ListE $ map (LitE . IntegerL) sizes))
                      ((sum sizes) `shiftR` 3)

decodeHelper :: ([Stmt], Name, [Name])      -> Exp
                                            -> Integer
                                            -> Q ([Stmt], Name, [Name])
decodeHelper (stmts, inputname, tuplenames) decodefunc size = do
  valname <- newName "val"
  restname <- newName "rest"
  tuplename <- newName "tup"
  let dec1 = ValD (TupP [VarP valname, VarP restname])
                  (NormalB $ AppE (AppE (VarE 'BS.splitAt)
                                        (LitE (IntegerL size)))
                                  (VarE inputname))
                  []
  let dec2 = ValD (VarP tuplename)
                  (NormalB $ AppE decodefunc (VarE valname))
                  []

  return (LetS [dec1, dec2] : stmts, restname, tuplename : tuplenames)

decGetName :: Dec -> Name
decGetName (ValD (VarP name) _ _) = name
decGetName _                      = undefined -- Error!

bitSyn :: [ReadType] -> Q Exp
bitSyn elements = do
    inputname <- newName "input"
    (stmts, restname, tuplenames) <- foldM readElement ([], inputname, []) elements
    returnS <- NoBindS `liftM` [| return $(tupE . map varE $ reverse tuplenames) |]
    return $ LamE [VarP inputname] (DoE . reverse $ returnS : stmts)


-- Tests
prop_bitPacking :: [(Int, Int)] -> Bool
prop_bitPacking fields =
  prevalues == (map fromIntegral postvalues) ||
  any (< 1) (map fst fields) ||
  any (< 0) (map snd fields)
  where
    undershoot = sum (map fst fields) `mod` 8
    fields' = if undershoot > 0
                then (8 - undershoot, 1) : fields
                else fields
    prevalues = map snd fields'
    packed = bits $ PackBits fields'
    postvalues = decodeBits (map (fromIntegral . fst) fields') packed

{-
instance Arbitrary Word16 where
  arbitrary = (arbitrary :: Gen Int) >>= return . fromIntegral
instance Arbitrary Word32 where
  arbitrary = (arbitrary :: Gen Int) >>= return . fromIntegral
-}

-- | This only works on little-endian machines as it checks that the foreign
--   functions (htonl and htons) match the native ones
prop_nativeByteShuffle32 :: Word32 -> Bool
prop_nativeByteShuffle32 x = endianSwitch32 x == htonl x
prop_nativeByteShuffle16 :: Word16 -> Bool
prop_nativeByteShuffle16 x = endianSwitch16 x == htons x
prop_littleEndian16 :: Word16 -> Bool
prop_littleEndian16 x = littleEndian16 x == x
prop_littleEndian32 :: Word32 -> Bool
prop_littleEndian32 x = littleEndian32 x == x