1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
-- |
-- Module : Codec.Binary.PythonString
-- Copyright : (c) 2009 Magnus Therning
-- License : BSD3
--
-- Implementation of python escaping.
--
-- This implementation encodes non-printable characters (0x00-0x1f, 0x7f-0xff)
-- to hex-value characters ('\xhh') while leaving printable characters as such:
--
-- @
-- \> encode [0, 10, 13, 110]
-- \"\\\\x00\\\\x0A\\\\x0Dn\"
-- \> putStrLn $ encode [0, 10, 13, 110]
-- \\x00\\x0A\\x0Dn
-- @
--
-- It also properly handles escaping of a few characters that require it:
--
-- @
-- \> encode [34, 39, 92]
-- \"\\\\\\\"\\\\\'\\\\\\\\\"
-- putStrLn $ encode [34, 39, 92]
-- \\\"\\'\\\\
-- @
--
-- Further documentation and information can be found at
-- <http://www.haskell.org/haskellwiki/Library/Data_encoding>.
module Codec.Binary.PythonString
( EncIncData(..)
, EncIncRes(..)
, encodeInc
, encode
, DecIncData(..)
, DecIncRes(..)
, decodeInc
, decode
, chop
, unchop
) where
import Codec.Binary.Util
import Data.Char
import Data.Maybe
import Data.Word
-- {{{1 encode
-- | Incremental encoder function.
encodeInc :: EncIncData -> EncIncRes String
encodeInc e = eI e
where
enc [] = []
enc (o:os)
| o < 0x20 || o > 0x7e = ('\\' : 'x' : toHex o) ++ enc os
| o == 34 = "\\\"" ++ enc os
| o == 39 = "\\'" ++ enc os
| o == 92 = "\\\\" ++ enc os
| otherwise = chr (fromIntegral o) : enc os
eI EDone = EFinal []
eI (EChunk bs) = EPart (enc bs) encodeInc
-- | Encode data.
encode :: [Word8] -> String
encode = encoder encodeInc
-- {{{1 decode
-- | Incremental decoder function.
decodeInc :: DecIncData String -> DecIncRes String
decodeInc d = dI [] d
where
dI [] DDone = DFinal [] []
dI lo DDone = DFail [] lo
dI lo (DChunk s) = doDec [] (lo ++ s)
where
doDec acc [] = DPart acc (dI [])
doDec acc s'@('\\':'x':c0:c1:cs) = let
o = fromHex [c0, c1]
in if isJust o
then doDec (acc ++ [fromJust o]) cs
else DFail acc s'
doDec acc s'@('\\':'\\':cs) = doDec (acc ++ [fromIntegral $ ord '\\']) cs
doDec acc s'@('\\':'\'':cs) = doDec (acc ++ [fromIntegral $ ord '\'']) cs
doDec acc s'@('\\':'\"':cs) = doDec (acc ++ [fromIntegral $ ord '\"']) cs
doDec acc s'@(c:cs)
| c /= '\\' = doDec (acc ++ [fromIntegral $ ord c]) cs
| otherwise = DPart acc (dI s')
-- | Decode data.
decode :: String -> Maybe [Word8]
decode = decoder decodeInc
-- {{{1 chop
-- | Chop up a string in parts.
chop :: Int -- ^ length of individual lines (values @\< 1@ are ignored)
-> String
-> [String]
chop n = let
_n = max 1 n
_chop [] = []
_chop cs = take _n cs : _chop (drop _n cs)
in _chop
-- {{{1 unchop
-- | Concatenate the list of strings into one long string.
unchop :: [String]
-> String
unchop = foldr (++) ""
|