summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam T <pxqr.sta@gmail.com>2013-09-26 20:58:11 +0400
committerSam T <pxqr.sta@gmail.com>2013-09-26 20:58:11 +0400
commit4756424235c5222a832e1967d54c5ddb369f6fbf (patch)
treee5683c8d3cf0fbdb04cea29d38ead37aa97a2b8f
Initial commit
-rw-r--r--.gitignore2
-rw-r--r--.travis.yml11
-rw-r--r--LICENSE30
-rw-r--r--README.md22
-rw-r--r--Setup.hs2
-rw-r--r--TODO.org1
-rw-r--r--base32-bytestring.cabal61
-rw-r--r--bench/Main.hs18
-rw-r--r--src/Data/ByteString/Base32.hibin0 -> 1805 bytes
-rw-r--r--src/Data/ByteString/Base32.hs269
-rw-r--r--src/Data/ByteString/Base32.obin0 -> 55200 bytes
-rw-r--r--tests/Data/ByteString/Base32Spec.hs46
-rw-r--r--tests/Spec.hs1
13 files changed, 463 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..316009b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
1dist
2cabal-dev
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..fd78ba7
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,11 @@
1language: haskell
2
3notifications:
4 email:
5 - pxqr.sta@gmail.com
6
7install:
8 cabal install --enable-tests --enable-benchmark --only-dependencies
9
10script:
11 cabal configure --enable-tests --enable-benchmark && cabal build && cabal test \ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..4c30139
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,30 @@
1Copyright (c) 2013, Sam Truzjan
2
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above
12 copyright notice, this list of conditions and the following
13 disclaimer in the documentation and/or other materials provided
14 with the distribution.
15
16 * Neither the name of Sam Truzjan nor the names of other
17 contributors may be used to endorse or promote products derived
18 from this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..83da005
--- /dev/null
+++ b/README.md
@@ -0,0 +1,22 @@
1`base32-bytestring` is efficient [base32][rfc] codec for bytestrings.
2The API is similar to [base64-bytestring][base64-pkg] package.
3
4### Performance
5
6| function | MB/sec |
7|:---------------:|:---------------:|
8|encoding | 400 |
9|decoding | 400 |
10|lenient decoding | N/A |
11
12### Build Status [![Build Status][travis-img]][travis-log]
13
14### Maintainer <pxqr.sta@gmail.com>
15
16You can report any issues at [Issue tracker][issues].
17
18[base64-pkg]: http://hackage.haskell.org/package/base64-bytestring-1.0.0.1
19[rfc]: http://tools.ietf.org/html/rfc4648
20[travis-img]: https://travis-ci.org/cobit/base32-bytestring.png
21[travis-log]: https://travis-ci.org/cobit/base32-bytestring
22[issues]: https://github.com/cobit/base32-bytestring/issues
diff --git a/Setup.hs b/Setup.hs
new file mode 100644
index 0000000..9a994af
--- /dev/null
+++ b/Setup.hs
@@ -0,0 +1,2 @@
1import Distribution.Simple
2main = defaultMain
diff --git a/TODO.org b/TODO.org
new file mode 100644
index 0000000..7b127bd
--- /dev/null
+++ b/TODO.org
@@ -0,0 +1 @@
* TODO lazy bytestrings
diff --git a/base32-bytestring.cabal b/base32-bytestring.cabal
new file mode 100644
index 0000000..ccba67e
--- /dev/null
+++ b/base32-bytestring.cabal
@@ -0,0 +1,61 @@
1name: base32-bytestring
2version: 0.0.0.0
3license: BSD3
4license-file: LICENSE
5author: Sam Truzjan
6maintainer: Sam Truzjan <pxqr.sta@gmail.com>
7copyright: (c) 2013 Sam Truzjan
8category: Codec, Data
9build-type: Simple
10cabal-version: >= 1.10
11synopsis: Fast base32 encoding and decoding for ByteStrings
12description:
13 Base32 encoding according to RFC4648
14 <http://tools.ietf.org/html/rfc4648>
15 This package have API similar to base64-bytestring.
16
17source-repository head
18 type: git
19 location: git://github.com/cobit/base32.git
20 branch: master
21
22source-repository this
23 type: git
24 location: git://github.com/cobit/base32.git
25 branch: master
26 tag: v0.0.0.0
27
28library
29 default-language: Haskell2010
30 default-extensions:
31 hs-source-dirs: src
32 exposed-modules: Data.ByteString.Base32
33 build-depends: base == 4.6.*
34 , bytestring == 0.10.*
35 , cpu
36 , bits-extras
37 ghc-options: -O2 -Wall
38
39test-suite spec
40 default-language: Haskell2010
41 default-extensions: OverloadedStrings
42 type: exitcode-stdio-1.0
43 hs-source-dirs: tests
44 main-is: Spec.hs
45 build-depends: base == 4.*
46 , base32-bytestring
47 , bytestring
48 , hspec >= 1.7
49 , QuickCheck
50 ghc-options: -Wall
51
52benchmark bench
53 type: exitcode-stdio-1.0
54 default-language: Haskell2010
55 hs-source-dirs: bench
56 main-is: Main.hs
57 build-depends: base == 4.*
58 , base32-bytestring
59 , bytestring
60 , criterion
61 ghc-options: -O2 -Wall -fno-warn-orphans
diff --git a/bench/Main.hs b/bench/Main.hs
new file mode 100644
index 0000000..5309cd9
--- /dev/null
+++ b/bench/Main.hs
@@ -0,0 +1,18 @@
1module Main (main) where
2
3import Criterion.Main
4import Data.ByteString as BS
5import Data.ByteString.Base32 as Base32
6
7
8main :: IO ()
9main = defaultMain
10 [ bench "encode/1M" $ nf encode $ BS.replicate 1000000 0x8e
11 , bench "encode/5M" $ nf encode $ BS.replicate 5000000 0x8e
12
13 , bench "decode/regular/1M" $ nf decode $ BS.replicate 1000000 0x41
14 , bench "decode/regular/5M" $ nf decode $ BS.replicate 5000000 0x41
15
16 , bench "decode/lenient/1M" $ nf decodeLenient $ BS.replicate 1000000 0x41
17 , bench "decode/lenient/5M" $ nf decodeLenient $ BS.replicate 5000000 0x41
18 ] \ No newline at end of file
diff --git a/src/Data/ByteString/Base32.hi b/src/Data/ByteString/Base32.hi
new file mode 100644
index 0000000..ce22b69
--- /dev/null
+++ b/src/Data/ByteString/Base32.hi
Binary files differ
diff --git a/src/Data/ByteString/Base32.hs b/src/Data/ByteString/Base32.hs
new file mode 100644
index 0000000..96bb893
--- /dev/null
+++ b/src/Data/ByteString/Base32.hs
@@ -0,0 +1,269 @@
1-- |
2-- Copyright : (c) Sam Truzjan 2013
3-- License : BSD3
4-- Maintainer : pxqr.sta@gmail.com
5-- Stability : stable
6-- Portability : portable
7--
8-- Efficient encoding and decoding of base32 encoded bytestring
9-- according to RFC 4648. <http://tools.ietf.org/html/rfc4648>
10--
11-- This module recommended to be imported as
12-- @import Data.ByteString.Base32 as Base32@ to avoid name clashes
13-- with @Data.Binary@ or @Data.ByteString.Base64@ modules.
14--
15{-# LANGUAGE BangPatterns #-}
16module Data.ByteString.Base32
17 ( encode
18 , decode
19 , decodeLenient
20 ) where
21
22import Data.Bits.Extras
23import Data.ByteString as BS
24import Data.ByteString.Internal as BS
25import Data.Word
26import Foreign hiding (unsafePerformIO)
27import System.IO.Unsafe (unsafePerformIO)
28import System.Endian
29
30
31{-----------------------------------------------------------------------
32-- Utils
33-----------------------------------------------------------------------}
34
35type Word5 = Word8
36
37-- System.Endian.toBE32 is slower because toBE32 implemented using
38-- cbits shuffle functions while toBE32' implemented used gcc
39-- intrinsics
40--
41toBE64' :: Word64 -> Word64
42toBE64' = if getSystemEndianness == BigEndian then id else byteSwap
43{-# INLINE toBE64' #-}
44
45toBE32' :: Word32 -> Word32
46toBE32' = if getSystemEndianness == BigEndian then id else byteSwap
47{-# INLINE toBE32' #-}
48
49fromBE32' :: Word32 -> Word32
50fromBE32' = toBE32'
51{-# INLINE fromBE32' #-}
52
53-- n = 2 ^ d
54padCeilN :: Int -> Int -> Int
55padCeilN !n !x
56 | remd == 0 = x
57 | otherwise = (x - remd) + n
58 where mask = n - 1
59 remd = x .&. mask
60
61{-----------------------------------------------------------------------
62-- Encoding
63-----------------------------------------------------------------------}
64
65type EncTable = Ptr Word8
66
67unpack5 :: EncTable -> ByteString -> ByteString
68unpack5 !tbl bs @ (PS fptr off sz) =
69 unsafePerformIO $ do
70 let unpackedSize = dstSize $ BS.length bs
71 BS.create unpackedSize $ \ dst -> do
72 withForeignPtr fptr $ \ ptr -> do
73 dst_end <- bigStep dst (advancePtr ptr off) sz
74 _ <- fillPadding dst_end (unpackedSize - (dst_end `minusPtr` dst))
75 return ()
76 where
77 dstSize x = padCeilN 8 (d + if m == 0 then 0 else 1)
78 where (d, m) = (x * 8) `quotRem` 5
79
80 fillPadding dst s = memset dst (c2w '=') (fromIntegral s)
81
82 bigStep !dst !src !s
83 | s >= 5 = do
84 unpack5_40 dst src
85 bigStep (dst `advancePtr` 8) (src `advancePtr` 5) (s - 5)
86 | otherwise = smallStep dst src s 0 0
87
88 unpack5_40 !dst !src = do
89 w32he <- peek (castPtr src) :: IO Word32
90 let w32 = toBE32' w32he
91 fill8_32 0 (w32 `unsafeShiftR` 27)
92 fill8_32 1 (w32 `unsafeShiftR` 22)
93 fill8_32 2 (w32 `unsafeShiftR` 17)
94 fill8_32 3 (w32 `unsafeShiftR` 12)
95 fill8_32 4 (w32 `unsafeShiftR` 7)
96 fill8_32 5 (w32 `unsafeShiftR` 2)
97
98 w8 <- peekElemOff src 4
99 fill8_32 6 ( (w32 `unsafeShiftL` 3)
100 .|. fromIntegral (w8 `unsafeShiftR` 5))
101 fill8_32 7 (fromIntegral w8)
102 where
103 fill8_32 :: Int -> Word32 -> IO ()
104 fill8_32 !i !w32 = do
105 w8 <- peekByteOff tbl (fromIntegral w32 .&. 0x1f)
106 poke (dst `advancePtr` i) w8
107
108 smallStep !dst !src !s !unused !un_cnt
109 | un_cnt >= 5 = do
110 let ix = unused `unsafeShiftR` 3
111 peekByteOff tbl (fromIntegral ix) >>= poke dst
112 smallStep (advancePtr dst 1)
113 src s
114 (unused `unsafeShiftL` 5)
115 (un_cnt - 5)
116
117 | s == 0 = do
118 if un_cnt == 0
119 then return dst
120 else do
121 let ix = unused `unsafeShiftR` 3
122 peekByteOff tbl (fromIntegral ix) >>= poke dst
123 return (dst `advancePtr` 1)
124
125 | otherwise = do
126 w8 <- peek src
127 let usd_cnt = 5 - un_cnt
128 let bits = w8 .&. complement (bit (8 - usd_cnt) - 1)
129 let ix = (unused .|. bits `shiftR` un_cnt) `unsafeShiftR` 3
130 peekByteOff tbl (fromIntegral ix) >>= poke dst
131 smallStep (advancePtr dst 1)
132 (advancePtr src 1) (pred s)
133 (w8 `shiftL` usd_cnt) (8 - usd_cnt)
134
135encW5 :: Word5 -> Word8
136encW5 !x
137 | x <= 25 = 65 + x
138 | otherwise = 24 + x
139{-# INLINE encW5 #-}
140
141encTable :: ForeignPtr Word8
142PS encTable _ _ = BS.pack $ fmap encW5 [0..31]
143
144-- | Encode a bytestring into base32 form.
145encode :: ByteString -> ByteString
146encode bs =
147 unsafePerformIO $ do
148 withForeignPtr encTable $ \ptr -> do
149 return $ unpack5 ptr bs
150
151{-----------------------------------------------------------------------
152-- Decoding
153-----------------------------------------------------------------------}
154
155type DecTable = Ptr Word5
156
157pack5 :: DecTable -> (Word8 -> Word5) -> ByteString -> ByteString
158pack5 !tbl !f bs @ (PS fptr off sz) =
159 unsafePerformIO $ do
160 let packedSize = dstSize $ BS.length bs
161 BS.createAndTrim packedSize $ \ dst -> do
162 withForeignPtr fptr $ \ ptr -> do
163 dst_end <- bigStep dst (advancePtr ptr off) sz
164 return (dst_end `minusPtr` dst)
165 where
166 lookupTable :: Word8 -> Word5
167 lookupTable ix
168 | x == invIx = error $ "base32: decode: invalid character" ++ show ix
169 | otherwise = x
170 where x = inlinePerformIO (peekByteOff tbl (fromIntegral ix))
171 {-# INLINE lookupTable #-}
172
173 dstSize x = d + if m == 0 then 0 else 1
174 where (d, m) = (x * 5) `quotRem` 8
175
176 bigStep !dst !src !s
177 | s > 8 = do
178 pack5_40 dst src
179 bigStep (dst `advancePtr` 5) (src `advancePtr` 8) (s - 8)
180 | otherwise = smallStep dst src s (0 :: Word64) 0
181
182 pack5_40 !dst !src = do
183 w64he <- peek (castPtr src) :: IO Word64
184 let w64 = toBE64' w64he
185 let w40 = putAsW5 (w64 `unsafeShiftR` 00) $
186 putAsW5 (w64 `unsafeShiftR` 08) $
187 putAsW5 (w64 `unsafeShiftR` 16) $
188 putAsW5 (w64 `unsafeShiftR` 24) $
189 putAsW5 (w64 `unsafeShiftR` 32) $
190 putAsW5 (w64 `unsafeShiftR` 40) $
191 putAsW5 (w64 `unsafeShiftR` 48) $
192 putAsW5 (w64 `unsafeShiftR` 56) 0
193 pokeW40 w40
194 where
195 putAsW5 :: Word64 -> Word64 -> Word64
196 {-# INLINE putAsW5 #-}
197 putAsW5 !w8 !acc = (acc `unsafeShiftL` 5)
198 .|. fromIntegral (lookupTable (fromIntegral w8))
199
200 pokeW40 :: Word64 -> IO ()
201 {-# INLINE pokeW40 #-}
202 pokeW40 !w40 = do
203 poke dst (fromIntegral (w40 `unsafeShiftR` 32) :: Word8)
204 poke (castPtr (dst `advancePtr` 1))
205 (fromBE32' (fromIntegral w40 :: Word32))
206
207 smallStep !dst !src !s !unused !un_cnt
208 | un_cnt >= 8 = do
209 poke dst $ fromIntegral (unused `unsafeShiftR` (un_cnt - 8))
210 smallStep (dst `advancePtr` 1) src s unused (un_cnt - 8)
211
212 | s == 0 = return dst
213 | otherwise = do
214 w8 <- peek src
215 if w2c w8 == '='
216 then if (bit un_cnt - 1) .&. unused == 0
217 then smallStep dst src 0 0 0
218 else smallStep dst src 0 (unused `shiftL` (8 - un_cnt)) 8
219 else smallStep dst
220 (src `advancePtr` 1) (pred s)
221 ((unused `unsafeShiftL` 5) .|. fromIntegral (f w8))
222 (un_cnt + 5)
223
224invIx :: Word5
225invIx = 255
226
227decW5 :: Word8 -> Word5
228decW5 !x
229 | x < 50 {- c2w '2' -} = invIx
230 | x <= 55 {- c2w '7' -} = x - 24
231 | x < 65 {- c2w 'A' -} = invIx
232 | x <= 90 {- c2w 'Z' -} = x - 65
233 | otherwise = invIx
234{-# INLINE decW5 #-}
235
236decTable :: ForeignPtr Word8
237PS decTable _ _ = BS.pack $ fmap decW5 [minBound .. maxBound]
238
239-- | Decode a base32 encoded bytestring.
240decode :: ByteString -> ByteString
241decode bs =
242 unsafePerformIO $ do
243 withForeignPtr decTable $ \tbl ->
244 return $ pack5 tbl decW5 bs
245
246{-----------------------------------------------------------------------
247-- Lenient Decoding
248-----------------------------------------------------------------------}
249
250decCharLenient :: Char -> Word5
251decCharLenient x
252 | x < '2' = err
253 | x <= '7' = 26 + fromIntegral (fromEnum x) - fromIntegral (fromEnum '2')
254 | x < 'A' = err
255 | x <= 'Z' = fromIntegral (fromEnum x) - fromIntegral (fromEnum 'A')
256 | x < 'a' = err
257 | x <= 'z' = fromIntegral (fromEnum x) - fromIntegral (fromEnum 'a')
258 | otherwise = err
259 where
260 err = error "base32: decodeChar: out of range"
261
262decW5Lenient :: Word8 -> Word5
263decW5Lenient = decCharLenient . w2c
264{-# INLINE decW5Lenient #-}
265
266-- TODO padding leniency
267-- | Case-insensitive counterpart of the 'decode'.
268decodeLenient :: ByteString -> ByteString
269decodeLenient = id -- pack5 nullPtr decW5Lenient \ No newline at end of file
diff --git a/src/Data/ByteString/Base32.o b/src/Data/ByteString/Base32.o
new file mode 100644
index 0000000..eadc3fe
--- /dev/null
+++ b/src/Data/ByteString/Base32.o
Binary files differ
diff --git a/tests/Data/ByteString/Base32Spec.hs b/tests/Data/ByteString/Base32Spec.hs
new file mode 100644
index 0000000..3c646e2
--- /dev/null
+++ b/tests/Data/ByteString/Base32Spec.hs
@@ -0,0 +1,46 @@
1{-# OPTIONS -fno-warn-orphans #-}
2module Data.ByteString.Base32Spec (spec) where
3
4import Control.Applicative
5import Data.ByteString as BS
6import Data.ByteString.Internal as BS
7import Data.ByteString.Base32 as Base32
8import Test.Hspec
9import Test.QuickCheck
10
11
12instance Arbitrary ByteString where
13 arbitrary = BS.pack <$> arbitrary
14
15spec :: Spec
16spec = do
17 describe "encode" $ do
18 it "conform RFC examples" $ do
19 encode "" `shouldBe` ""
20 encode "f" `shouldBe` "MY======"
21 encode "fo" `shouldBe` "MZXQ===="
22 encode "foo" `shouldBe` "MZXW6==="
23 encode "foob" `shouldBe` "MZXW6YQ="
24 encode "fooba" `shouldBe` "MZXW6YTB"
25 encode "foobar" `shouldBe` "MZXW6YTBOI======"
26
27 it "size always multiple of 8 bytes" $ property $ \bs ->
28 (BS.length (encode bs) `rem` 8) `shouldBe` 0
29
30 it "padding less than" $ property $ \bs ->
31 count (c2w '=') bs `shouldSatisfy` (< 8)
32
33 describe "decode" $ do
34 it "conform RFC examples" $ do
35 decode "" `shouldBe` ""
36 decode "MY======" `shouldBe` "f"
37 decode "MZXQ====" `shouldBe` "fo"
38 decode "MZXW6===" `shouldBe` "foo"
39 decode "MZXW6YQ=" `shouldBe` "foob"
40 decode "MZXW6YTB" `shouldBe` "fooba"
41 decode "MZXW6YTBOI======" `shouldBe` "foobar"
42
43 it "inverse for encode" $ property $ \bs ->
44 decode (encode bs) == bs
45
46-- describe "decodeLenient" $ do
diff --git a/tests/Spec.hs b/tests/Spec.hs
new file mode 100644
index 0000000..52ef578
--- /dev/null
+++ b/tests/Spec.hs
@@ -0,0 +1 @@
{-# OPTIONS_GHC -F -pgmF hspec-discover #-} \ No newline at end of file