diff options
Diffstat (limited to 'src/Codec/Binary/PythonString.hs')
-rw-r--r-- | src/Codec/Binary/PythonString.hs | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/src/Codec/Binary/PythonString.hs b/src/Codec/Binary/PythonString.hs new file mode 100644 index 0000000..eae4c51 --- /dev/null +++ b/src/Codec/Binary/PythonString.hs | |||
@@ -0,0 +1,109 @@ | |||
1 | -- | | ||
2 | -- Module : Codec.Binary.PythonString | ||
3 | -- Copyright : (c) 2009 Magnus Therning | ||
4 | -- License : BSD3 | ||
5 | -- | ||
6 | -- Implementation of python escaping. | ||
7 | -- | ||
8 | -- This implementation encodes non-printable characters (0x00-0x1f, 0x7f-0xff) | ||
9 | -- to hex-value characters ('\xhh') while leaving printable characters as such: | ||
10 | -- | ||
11 | -- @ | ||
12 | -- \> encode [0, 10, 13, 110] | ||
13 | -- \"\\\\x00\\\\x0A\\\\x0Dn\" | ||
14 | -- \> putStrLn $ encode [0, 10, 13, 110] | ||
15 | -- \\x00\\x0A\\x0Dn | ||
16 | -- @ | ||
17 | -- | ||
18 | -- It also properly handles escaping of a few characters that require it: | ||
19 | -- | ||
20 | -- @ | ||
21 | -- \> encode [34, 39, 92] | ||
22 | -- \"\\\\\\\"\\\\\'\\\\\\\\\" | ||
23 | -- putStrLn $ encode [34, 39, 92] | ||
24 | -- \\\"\\'\\\\ | ||
25 | -- @ | ||
26 | -- | ||
27 | -- Further documentation and information can be found at | ||
28 | -- <http://www.haskell.org/haskellwiki/Library/Data_encoding>. | ||
29 | module Codec.Binary.PythonString | ||
30 | ( EncIncData(..) | ||
31 | , EncIncRes(..) | ||
32 | , encodeInc | ||
33 | , encode | ||
34 | , DecIncData(..) | ||
35 | , DecIncRes(..) | ||
36 | , decodeInc | ||
37 | , decode | ||
38 | , chop | ||
39 | , unchop | ||
40 | ) where | ||
41 | |||
42 | import Codec.Binary.Util | ||
43 | |||
44 | import Data.Char | ||
45 | import Data.Maybe | ||
46 | import Data.Word | ||
47 | |||
48 | -- {{{1 encode | ||
49 | -- | Incremental encoder function. | ||
50 | encodeInc :: EncIncData -> EncIncRes String | ||
51 | encodeInc e = eI e | ||
52 | where | ||
53 | enc [] = [] | ||
54 | enc (o:os) | ||
55 | | o < 0x20 || o > 0x7e = ('\\' : 'x' : toHex o) ++ enc os | ||
56 | | o == 34 = "\\\"" ++ enc os | ||
57 | | o == 39 = "\\'" ++ enc os | ||
58 | | o == 92 = "\\\\" ++ enc os | ||
59 | | otherwise = chr (fromIntegral o) : enc os | ||
60 | |||
61 | eI EDone = EFinal [] | ||
62 | eI (EChunk bs) = EPart (enc bs) encodeInc | ||
63 | |||
64 | -- | Encode data. | ||
65 | encode :: [Word8] -> String | ||
66 | encode = encoder encodeInc | ||
67 | |||
68 | -- {{{1 decode | ||
69 | -- | Incremental decoder function. | ||
70 | decodeInc :: DecIncData String -> DecIncRes String | ||
71 | decodeInc d = dI [] d | ||
72 | where | ||
73 | dI [] DDone = DFinal [] [] | ||
74 | dI lo DDone = DFail [] lo | ||
75 | dI lo (DChunk s) = doDec [] (lo ++ s) | ||
76 | where | ||
77 | doDec acc [] = DPart acc (dI []) | ||
78 | doDec acc s'@('\\':'x':c0:c1:cs) = let | ||
79 | o = fromHex [c0, c1] | ||
80 | in if isJust o | ||
81 | then doDec (acc ++ [fromJust o]) cs | ||
82 | else DFail acc s' | ||
83 | doDec acc s'@('\\':'\\':cs) = doDec (acc ++ [fromIntegral $ ord '\\']) cs | ||
84 | doDec acc s'@('\\':'\'':cs) = doDec (acc ++ [fromIntegral $ ord '\'']) cs | ||
85 | doDec acc s'@('\\':'\"':cs) = doDec (acc ++ [fromIntegral $ ord '\"']) cs | ||
86 | doDec acc s'@(c:cs) | ||
87 | | c /= '\\' = doDec (acc ++ [fromIntegral $ ord c]) cs | ||
88 | | otherwise = DPart acc (dI s') | ||
89 | |||
90 | -- | Decode data. | ||
91 | decode :: String -> Maybe [Word8] | ||
92 | decode = decoder decodeInc | ||
93 | |||
94 | -- {{{1 chop | ||
95 | -- | Chop up a string in parts. | ||
96 | chop :: Int -- ^ length of individual lines (values @\< 1@ are ignored) | ||
97 | -> String | ||
98 | -> [String] | ||
99 | chop n = let | ||
100 | _n = max 1 n | ||
101 | _chop [] = [] | ||
102 | _chop cs = take _n cs : _chop (drop _n cs) | ||
103 | in _chop | ||
104 | |||
105 | -- {{{1 unchop | ||
106 | -- | Concatenate the list of strings into one long string. | ||
107 | unchop :: [String] | ||
108 | -> String | ||
109 | unchop = foldr (++) "" | ||