summaryrefslogtreecommitdiff
path: root/src/Data/BEncode/Internal.hs
blob: 1532fe6f021e8d0208dbbe5cac98c123bc5671d3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
-- |
--   Copyright   :  (c) Sam Truzjan 2013
--   License     :  BSD3
--   Maintainer  :  pxqr.sta@gmail.com
--   Stability   :  stable
--   Portability :  portable
--
--   This module provides bencode values serialization. Normally, you
--   don't need to import this module, use 'Data.BEncode' instead.
--
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE CPP #-}
module Data.BEncode.Internal
       ( -- * Parsing
         parser
       , parse

         -- * Rendering
       , builder
       , build
       , ppBEncode
       ) where

import Control.Applicative
import           Data.Attoparsec.ByteString.Char8 (Parser)
import qualified Data.Attoparsec.ByteString.Char8 as P
import           Data.ByteString as B
import           Data.ByteString.Internal as B (c2w, w2c)
import qualified Data.ByteString.Lazy as Lazy
#if MIN_VERSION_bytestring(0,10,0)
import qualified Data.ByteString.Lazy.Builder as B
import qualified Data.ByteString.Lazy.Builder.ASCII as B
#else
import qualified Data.ByteString.Builder as B
-- import qualified Data.ByteString.Builder.ASCII as B
-- import qualified Data.ByteString.Builder.Internal as B
#endif
import Data.Foldable
import Data.List as L
import Data.Monoid
import Text.PrettyPrint hiding ((<>))
import Data.Char
import Data.Maybe

import Data.BEncode.Types
import Data.BEncode.BDict as BD

import GHC.Types
import GHC.Integer.GMP.Internals

{--------------------------------------------------------------------
-- Serialization
--------------------------------------------------------------------}

integerDecimal :: Integer -> B.Builder
integerDecimal (S# i#) = B.intDec (I# i#)
integerDecimal  i      = B.string7 (show i) -- TODO more efficient

-- | BEncode format encoder according to specification.
builder :: BValue -> B.Builder
builder = go
    where
      go (BInteger i) = B.word8 (c2w 'i') <>
                          integerDecimal i <>
                        B.word8 (c2w 'e')
      go (BString  s) = buildString s
      go (BList    l) = B.word8 (c2w 'l') <>
                        foldMap go l <>
                        B.word8 (c2w 'e')
      go (BDict    d) = B.word8 (c2w 'd') <>
                        bifoldMap mkKV d <>
                        B.word8 (c2w 'e')
          where
            mkKV k v = buildString k <> go v

      buildString s = B.intDec (B.length s) <>
                      B.word8 (c2w ':') <>
                      B.byteString s
      {-# INLINE buildString #-}

-- | Convert bencoded value to raw bytestring according to the
-- specification.
build :: BValue -> Lazy.ByteString
build = B.toLazyByteString . builder


sample = "lld10:xOo@-ovQ\b}i8eeded12:]\ENQ89gJp\DC1Y\t!]17:\SOHRQ8\DLE\ESC\NULiUSRo\t.M<gei18eldede8:VX^ejm\SO_d12:.J*\DLEIc\SIV\ESCun\SOHdeede8:tg!\"lU\SOH\DEL1:Pe16:X\EMk\ESCGaek)\DC4'\t+\ESChhei-7e16:<OJ\v9\".i;\DC4[=]_D9de16:$-giCtwedm!\CAN\aA\DC3{dee"

{--------------------------------------------------------------------
-- Deserialization
--------------------------------------------------------------------}

-- TODO try to replace peekChar with something else
-- | BEncode format parser according to specification.
parser :: Parser BValue
parser = valueP Nothing
  where
    valueP prior = do
      mc <- maybe (optional P.anyChar) (return . Just) prior
      case mc of
        Nothing -> fail "end of input"
        Just c  ->
            case c of
              -- if we have digit it always should be string length
              di | '0' <= di && di <= '9' -> BString <$> stringP c
              'i' -> ((BInteger <$> integerP)  <* P.char 'e') --  P.anyChar)
              'l' -> ((BList    <$> listBodyP) ) -- <* P.anyChar)
              'd' ->  (BDict    <$> dictBodyP) -- <* P.anyChar
              t   -> fail ("bencode unknown tag: " ++ [t])

    dictBodyP :: Parser BDict
    dictBodyP =
            (P.char 'e' *> pure Nil)
            <|> do c <- P.satisfy isDigit -- P.anyChar
                   Cons <$> stringP c <*> valueP Nothing <*> dictBodyP

    listBodyP = do
      c <- optional P.anyChar
      case c of
        Just 'e' -> return []
        _        -> (:) <$> valueP c <*> listBodyP

    leadingDigit c zeros n0
        | n0==(-10) = d * 10^(fromIntegral (B.length zeros))
        | n0/=0     = d * 10^(fromIntegral (B.length zeros) + truncate (1+logBase 10 (fromIntegral n0))) + n0
        | otherwise = d * 10
      where d = fromIntegral $ ord c - 48

    stringP :: Char -> Parser ByteString
    stringP c = do
      zeros <- P.takeWhile (=='0')
      n0 <- (P.decimal <|> pure (-10)) :: Parser Int
      let n = leadingDigit c zeros n0
      P.char ':'
      P.take n
    {-# INLINE stringP #-}

    integerP :: Parser Integer
    integerP = do
      c <- optional P.anyChar
      case c of
        Just '-' -> do
          negate <$> P.decimal
        Just c'  ->  do zeros <- P.takeWhile (=='0')
                        leadingDigit c' zeros <$> (P.decimal <|> pure (-10))
        _        ->  P.decimal
    {-# INLINE integerP #-}

-- | Try to convert raw bytestring to bencoded value according to
-- specification.
parse :: ByteString -> Either String BValue
parse = P.parseOnly parser

{--------------------------------------------------------------------
  Pretty Printing
--------------------------------------------------------------------}

ppBS :: ByteString -> Doc
ppBS = text . L.map w2c . B.unpack

-- | Convert to easily readable JSON-like document. Typically used for
-- debugging purposes.
ppBEncode :: BValue -> Doc
ppBEncode (BInteger i) = int $ fromIntegral i
ppBEncode (BString  s) = ppBS s
ppBEncode (BList    l)
    = brackets $ hsep $ punctuate comma $ L.map ppBEncode l
ppBEncode (BDict    d)
    = braces $ vcat $ punctuate comma $ L.map ppKV $ BD.toAscList d
  where
    ppKV (k, v) = ppBS k <+> colon <+> ppBEncode v