blob: 1532fe6f021e8d0208dbbe5cac98c123bc5671d3 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
|
-- |
-- Copyright : (c) Sam Truzjan 2013
-- License : BSD3
-- Maintainer : pxqr.sta@gmail.com
-- Stability : stable
-- Portability : portable
--
-- This module provides bencode values serialization. Normally, you
-- don't need to import this module, use 'Data.BEncode' instead.
--
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE CPP #-}
module Data.BEncode.Internal
( -- * Parsing
parser
, parse
-- * Rendering
, builder
, build
, ppBEncode
) where
import Control.Applicative
import Data.Attoparsec.ByteString.Char8 (Parser)
import qualified Data.Attoparsec.ByteString.Char8 as P
import Data.ByteString as B
import Data.ByteString.Internal as B (c2w, w2c)
import qualified Data.ByteString.Lazy as Lazy
#if MIN_VERSION_bytestring(0,10,0)
import qualified Data.ByteString.Lazy.Builder as B
import qualified Data.ByteString.Lazy.Builder.ASCII as B
#else
import qualified Data.ByteString.Builder as B
-- import qualified Data.ByteString.Builder.ASCII as B
-- import qualified Data.ByteString.Builder.Internal as B
#endif
import Data.Foldable
import Data.List as L
import Data.Monoid
import Text.PrettyPrint hiding ((<>))
import Data.Char
import Data.Maybe
import Data.BEncode.Types
import Data.BEncode.BDict as BD
import GHC.Types
import GHC.Integer.GMP.Internals
{--------------------------------------------------------------------
-- Serialization
--------------------------------------------------------------------}
integerDecimal :: Integer -> B.Builder
integerDecimal (S# i#) = B.intDec (I# i#)
integerDecimal i = B.string7 (show i) -- TODO more efficient
-- | BEncode format encoder according to specification.
builder :: BValue -> B.Builder
builder = go
where
go (BInteger i) = B.word8 (c2w 'i') <>
integerDecimal i <>
B.word8 (c2w 'e')
go (BString s) = buildString s
go (BList l) = B.word8 (c2w 'l') <>
foldMap go l <>
B.word8 (c2w 'e')
go (BDict d) = B.word8 (c2w 'd') <>
bifoldMap mkKV d <>
B.word8 (c2w 'e')
where
mkKV k v = buildString k <> go v
buildString s = B.intDec (B.length s) <>
B.word8 (c2w ':') <>
B.byteString s
{-# INLINE buildString #-}
-- | Convert bencoded value to raw bytestring according to the
-- specification.
build :: BValue -> Lazy.ByteString
build = B.toLazyByteString . builder
sample = "lld10:xOo@-ovQ\b}i8eeded12:]\ENQ89gJp\DC1Y\t!]17:\SOHRQ8\DLE\ESC\NULiUSRo\t.M<gei18eldede8:VX^ejm\SO_d12:.J*\DLEIc\SIV\ESCun\SOHdeede8:tg!\"lU\SOH\DEL1:Pe16:X\EMk\ESCGaek)\DC4'\t+\ESChhei-7e16:<OJ\v9\".i;\DC4[=]_D9de16:$-giCtwedm!\CAN\aA\DC3{dee"
{--------------------------------------------------------------------
-- Deserialization
--------------------------------------------------------------------}
-- TODO try to replace peekChar with something else
-- | BEncode format parser according to specification.
parser :: Parser BValue
parser = valueP Nothing
where
valueP prior = do
mc <- maybe (optional P.anyChar) (return . Just) prior
case mc of
Nothing -> fail "end of input"
Just c ->
case c of
-- if we have digit it always should be string length
di | '0' <= di && di <= '9' -> BString <$> stringP c
'i' -> ((BInteger <$> integerP) <* P.char 'e') -- P.anyChar)
'l' -> ((BList <$> listBodyP) ) -- <* P.anyChar)
'd' -> (BDict <$> dictBodyP) -- <* P.anyChar
t -> fail ("bencode unknown tag: " ++ [t])
dictBodyP :: Parser BDict
dictBodyP =
(P.char 'e' *> pure Nil)
<|> do c <- P.satisfy isDigit -- P.anyChar
Cons <$> stringP c <*> valueP Nothing <*> dictBodyP
listBodyP = do
c <- optional P.anyChar
case c of
Just 'e' -> return []
_ -> (:) <$> valueP c <*> listBodyP
leadingDigit c zeros n0
| n0==(-10) = d * 10^(fromIntegral (B.length zeros))
| n0/=0 = d * 10^(fromIntegral (B.length zeros) + truncate (1+logBase 10 (fromIntegral n0))) + n0
| otherwise = d * 10
where d = fromIntegral $ ord c - 48
stringP :: Char -> Parser ByteString
stringP c = do
zeros <- P.takeWhile (=='0')
n0 <- (P.decimal <|> pure (-10)) :: Parser Int
let n = leadingDigit c zeros n0
P.char ':'
P.take n
{-# INLINE stringP #-}
integerP :: Parser Integer
integerP = do
c <- optional P.anyChar
case c of
Just '-' -> do
negate <$> P.decimal
Just c' -> do zeros <- P.takeWhile (=='0')
leadingDigit c' zeros <$> (P.decimal <|> pure (-10))
_ -> P.decimal
{-# INLINE integerP #-}
-- | Try to convert raw bytestring to bencoded value according to
-- specification.
parse :: ByteString -> Either String BValue
parse = P.parseOnly parser
{--------------------------------------------------------------------
Pretty Printing
--------------------------------------------------------------------}
ppBS :: ByteString -> Doc
ppBS = text . L.map w2c . B.unpack
-- | Convert to easily readable JSON-like document. Typically used for
-- debugging purposes.
ppBEncode :: BValue -> Doc
ppBEncode (BInteger i) = int $ fromIntegral i
ppBEncode (BString s) = ppBS s
ppBEncode (BList l)
= brackets $ hsep $ punctuate comma $ L.map ppBEncode l
ppBEncode (BDict d)
= braces $ vcat $ punctuate comma $ L.map ppKV $ BD.toAscList d
where
ppKV (k, v) = ppBS k <+> colon <+> ppBEncode v
|