summaryrefslogtreecommitdiff
path: root/src/Data/Bitfield.hs
diff options
context:
space:
mode:
authorSam T <pxqr.sta@gmail.com>2013-06-02 05:01:46 +0400
committerSam T <pxqr.sta@gmail.com>2013-06-02 05:01:46 +0400
commit01f51b51af8a67516238bc7264079601a7e2ece5 (patch)
tree13b346ad0ba930e7d964a3de9988365f6cc6ba4f /src/Data/Bitfield.hs
parent5e92eec501e0a1ca6d09a01e078cf54ff3277273 (diff)
~ Use IntSet instead of ByteString for bitfields.
There are several reasons for this: * IntSet is stored in ordinary heap, while ByteStrings in pinned memory; * Our IntSet's should be much faster 90% time. (in typical BT client) Hovewer in worst case IntSet is slower, but difference should is not so big. (We should measure this although) * It's pure, tested, error-free and much more convenient. Moreover we have kill a lot of ugly code!
Diffstat (limited to 'src/Data/Bitfield.hs')
-rw-r--r--src/Data/Bitfield.hs443
1 files changed, 149 insertions, 294 deletions
diff --git a/src/Data/Bitfield.hs b/src/Data/Bitfield.hs
index f7240c8a..546c68e9 100644
--- a/src/Data/Bitfield.hs
+++ b/src/Data/Bitfield.hs
@@ -5,341 +5,196 @@
5-- Stability : experimental 5-- Stability : experimental
6-- Portability : portable 6-- Portability : portable
7-- 7--
8-- 8-- This modules provides all necessary machinery to work with
9-- This module provides Bitfield datatype used to represent sets of 9-- bitfields. Bitfields are used to keep track indices of complete
10-- piece indexes any peer have. All associated operations should be 10-- pieces either peer have or client have.
11-- defined here as well.
12-- 11--
13{-# LANGUAGE BangPatterns #-} 12{-# LANGUAGE BangPatterns #-}
14{-# LANGUAGE RankNTypes #-} 13{-# LANGUAGE RecordWildCards #-}
15module Data.Bitfield 14module Data.Bitfield
16-- TODO: move to Data.Bitfield 15 ( Bitfield, PieceCount
17 ( Bitfield(..)
18 16
19 -- * Construction 17 -- * Construction
20 , empty, full 18 , empty
19 , insert
20 , haveAll, haveNone, have
21 21
22 -- * Query 22 -- * Query
23 , bitfieldByteCount, bitfieldBitCount 23 , haveCount, totalCount, completeness
24
25 , haveCount, completeness
26 , findMin, findMax 24 , findMin, findMax
27 , union, intersection, difference, combine 25 , frequencies, rarest
28 , frequencies 26
27 -- * Combine
28 , union
29 , intersection
30 , difference
29 31
30 -- * Serialization 32 -- * Serialization
31 , getBitfield, putBitfield 33 , getBitfield, putBitfield
34 , bitfieldByteCount
32 35
33 -- * Conversion 36 , -- * Debug
34 , toList 37 mkBitfield
35 , fromByteString, toByteString
36
37 -- * Debug
38 , aligned, alignLow, alignedZip
39 ) where 38 ) where
40 39
41import Control.Applicative hiding (empty) 40import Control.Monad
42import Data.Bits 41import Control.Monad.ST
43import Data.ByteString (ByteString) 42import Data.Vector.Unboxed (Vector)
44import qualified Data.ByteString as B 43import qualified Data.Vector.Unboxed as V
45import qualified Data.ByteString.Internal as B 44import qualified Data.Vector.Unboxed.Mutable as VM
46import Data.List as L hiding (union) 45import Data.IntervalSet (IntSet)
47import Data.Serialize 46import qualified Data.IntervalSet as S
48import Data.Word 47import Data.List (foldl')
48import Data.Monoid
49import Data.Ratio
50import Data.Serialize
51import Network.BitTorrent.PeerWire.Block
49 52
50import Foreign
51 53
52--import Network.BitTorrent.PeerWire.Block 54type PieceCount = Int
53import Data.Torrent
54 55
55-- TODO: one good idea is to aggregate frequently used stats in reducer 56-- TODO cache some operations
56-- it should give a big boost
57newtype Bitfield = MkBitfield {
58 bfBits :: ByteString
59-- , bfSize :: Int
60 } deriving (Show, Eq, Ord)
61 57
58-- | Bitfields are represented just as integer sets but with
59-- restriction: the each set should be within given interval (or
60-- subset of). Size is used to specify interval, so bitfield of size
61-- 10 might contain only indices in interval [0..9].
62--
63data Bitfield = Bitfield {
64 bfSize :: !PieceCount
65 , bfSet :: !IntSet
66 } deriving (Show, Read, Eq)
62 67
63empty :: Int -> Bitfield 68-- Invariants: all elements of bfSet lie in [0..bfSize - 1];
64empty n = MkBitfield $ B.replicate (sizeInBase n 8) 0
65{-# INLINE empty #-}
66 69
67full :: Int -> Bitfield 70instance Monoid Bitfield where
68full n = MkBitfield $ B.replicate (sizeInBase n 8) (complement 0) 71 {-# SPECIALIZE instance Monoid Bitfield #-}
69{-# INLINE full #-} 72 mempty = empty 0
73 mappend = union
74 mconcat = unions
70 75
71toList :: Bitfield -> [Bool] 76-- TODO documentation
72toList (MkBitfield bs) = concatMap unpkg (B.unpack bs) 77{-----------------------------------------------------------------------
73 where 78 Construction
74 unpkg :: Word8 -> [Bool] 79-----------------------------------------------------------------------}
75 unpkg byte = L.map (testBit byte) [0..bitSize (undefined :: Word8) - 1]
76{-# INLINE toList #-}
77 80
78fromByteString :: ByteString -> Bitfield 81empty :: PieceCount -> Bitfield
79fromByteString = MkBitfield 82empty s = Bitfield s S.empty
80{-# INLINE fromByteString #-}
81 83
82toByteString :: Bitfield -> ByteString 84insert :: PieceIx -> Bitfield -> Bitfield
83toByteString = bfBits 85insert ix Bitfield {..}
84{-# INLINE toByteString #-} 86 | 0 <= ix && ix < bfSize = Bitfield bfSize (S.insert ix bfSet)
87 | otherwise = Bitfield bfSize bfSet
85 88
86getBitfield :: Int -> Get Bitfield 89haveNone :: PieceCount -> Bitfield
87getBitfield n = MkBitfield <$> getBytes n 90haveNone = empty
88{-# INLINE getBitfield #-}
89 91
90putBitfield :: Bitfield -> Put 92haveAll :: PieceCount -> Bitfield
91putBitfield = putByteString . bfBits 93haveAll s = Bitfield s (S.interval 0 (s - 1))
92{-# INLINE putBitfield #-}
93 94
94bitfieldByteCount :: Bitfield -> Int 95have :: PieceIx -> Bitfield -> Bitfield
95bitfieldByteCount = B.length . bfBits 96have = insert
96{-# INLINE bitfieldByteCount #-}
97 97
98-- WARN 98{-----------------------------------------------------------------------
99-- TODO 99 Query
100bitfieldBitCount :: Bitfield -> Int 100-----------------------------------------------------------------------}
101bitfieldBitCount bf = bitSize (undefined :: Word8) * bitfieldByteCount bf
102{-# INLINE bitfieldBitCount #-}
103 101
104align :: Storable a => Ptr a -> (Ptr a, Int) 102haveCount :: Bitfield -> PieceCount
105align p = tie (alignPtr p) undefined 103haveCount = S.size . bfSet
106 where 104
107 tie :: Storable a => (Int -> Ptr a) -> a -> (Ptr a, Int) 105totalCount :: Bitfield -> PieceCount
108 tie f a = (f (alignment a), (alignment a)) 106totalCount = bfSize
109
110alignLow :: Ptr Word8 -> Ptr Word
111alignLow ptr =
112 let alg = alignment (undefined :: Word)
113 aptr = alignPtr (castPtr ptr) alg :: Ptr Word
114 in
115 if ptr == castPtr aptr
116 then aptr
117 else castPtr ((castPtr aptr :: Ptr Word8) `advancePtr` negate alg)
118
119isAlignedBy :: Storable a => Ptr a -> Int -> Bool
120isAlignedBy ptr alg = alignPtr ptr alg == ptr
121
122type Mem a = (Ptr a, Int)
123
124aligned :: Storable a => Mem Word8 -> (Mem Word8, Mem a, Mem Word8)
125aligned (ptr, len) =
126 let lowPtr = ptr
127 lowLen = midPtr `minusPtr` ptr
128 midOff = lowLen
129 (midPtr, alg) = align (castPtr ptr)
130 midLen = alg * div (len - midOff) alg
131 midLenA = midLen `div` alg
132 hghOff = midOff + midLen
133 hghPtr = ptr `advancePtr` hghOff
134 hghLen = len - hghOff
135 in
136 ((lowPtr, lowLen), (midPtr, midLenA), (hghPtr, hghLen))
137 where
138{-# INLINE aligned #-}
139
140type Mem3 a = (Ptr a, Ptr a, Ptr a, Int)
141
142emptyMem3 :: Mem3 a
143emptyMem3 = (nullPtr, nullPtr, nullPtr, 0)
144
145-- assume resulting memory is aligned
146alignedZip :: Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> Int
147 -> (Mem3 Word, Mem3 Word8)
148alignedZip aptr bptr cptr size =
149 let alg = alignment (undefined :: Word) in
150 if (aptr `isAlignedBy` alg) && (bptr `isAlignedBy` alg)
151 then
152 let asize = alignLow (nullPtr `plusPtr` size) `minusPtr` nullPtr
153 in
154 ( (castPtr aptr, castPtr bptr, castPtr cptr, asize `div` alg)
155 , ( aptr `advancePtr` asize
156 , bptr `advancePtr` asize
157 , cptr `advancePtr` asize
158 , (size - asize)
159 )
160 )
161 else (emptyMem3, (aptr, bptr, cptr, size))
162
163-- force specialization
164zipWithBS :: (Word -> Word -> Word)
165 -> (Word8 -> Word8 -> Word8)
166 -> ByteString -> ByteString -> ByteString
167zipWithBS f g a b =
168 let (afptr, aoff, asize) = B.toForeignPtr a
169 (bfptr, boff, bsize) = B.toForeignPtr b
170 size = min asize bsize in
171 B.unsafeCreate size $ \rptr -> do
172 withForeignPtr afptr $ \_aptr -> do
173 withForeignPtr bfptr $ \_bptr -> do
174 let aptr = _aptr `advancePtr` aoff
175 let bptr = _bptr `advancePtr` boff
176
177 let (mid, hgh) = alignedZip aptr bptr rptr size
178 zipWords mid
179 zipBytes hgh
180 where
181 zipBytes :: (Ptr Word8, Ptr Word8, Ptr Word8, Int) -> IO ()
182 zipBytes (aptr, bptr, rptr, n) = go 0
183 where
184 go :: Int -> IO ()
185 go i | i < n = do -- TODO unfold
186 av <- peekElemOff aptr i
187 bv <- peekElemOff bptr i
188 pokeElemOff rptr i (g av bv)
189 go (succ i)
190 | otherwise = return ()
191
192 zipWords :: (Ptr Word, Ptr Word, Ptr Word, Int) -> IO ()
193 zipWords (aptr, bptr, rptr, n) = go 0
194 where
195 go :: Int -> IO ()
196 go i | i < n = do -- TODO unfold
197 av <- peekElemOff aptr i
198 bv <- peekElemOff bptr i
199 pokeElemOff rptr i (f av bv)
200 go (succ i)
201 | otherwise = return ()
202
203
204
205zipWithBF :: (forall a. Bits a => a -> a -> a) -> Bitfield -> Bitfield -> Bitfield
206zipWithBF f a b = MkBitfield $ zipWithBS f f (bfBits a) (bfBits b)
207{-# INLINE zipWithBF #-}
208
209findSet :: ByteString -> Maybe Int
210findSet b =
211 let (fptr, off, len) = B.toForeignPtr b in
212 B.inlinePerformIO $ withForeignPtr fptr $ \_ptr -> do
213 let ptr = _ptr `advancePtr` off
214
215 let (low, mid, hgh) = aligned (ptr, len)
216 let lowOff = fst low `minusPtr` ptr
217 let midOff = fst mid `minusPtr` ptr
218 let hghOff = fst hgh `minusPtr` ptr
219
220 let resL = (lowOff +) <$> goFind low
221 let resM = (midOff +) <$> goFind (mid :: Mem Word) -- tune size here
222 -- TODO: with Word8
223 -- bytestring findIndex works 2
224 -- times faster.
225 let resH = (hghOff +) <$> goFind hgh
226
227 let res = resL <|> resM <|> resH
228
229 -- computation of res should not escape withForeignPtr
230 case res of
231 Nothing -> return ()
232 Just _ -> return ()
233
234 return res
235 107
108-- |
109--
110-- > forall bf. 0 <= completeness bf <= 1
111--
112completeness :: Bitfield -> Ratio PieceCount
113completeness b = haveCount b % totalCount b
114
115findMin :: Bitfield -> Maybe PieceIx
116findMin Bitfield {..}
117 | S.null bfSet = Nothing
118 | otherwise = Just (S.findMin bfSet)
119
120findMax :: Bitfield -> Maybe PieceIx
121findMax Bitfield {..}
122 | S.null bfSet = Nothing
123 | otherwise = Just (S.findMax bfSet)
124
125type Frequency = Int
126
127frequencies :: [Bitfield] -> Vector Frequency
128frequencies [] = V.fromList []
129frequencies xs = runST $ do
130 v <- VM.new size
131 VM.set v 0
132 forM_ xs $ \ Bitfield {..} -> do
133 forM_ (S.toList bfSet) $ \ x -> do
134 fr <- VM.read v x
135 VM.write v x (succ fr)
136 V.unsafeFreeze v
236 where 137 where
237 goFind :: (Storable a, Eq a, Num a) => Mem a -> Maybe Int 138 size = maximum (map bfSize xs)
238 goFind (ptr, n) = go 0
239 where
240 go :: Int -> Maybe Int
241 go i | i < n =
242 let v = B.inlinePerformIO (peekElemOff ptr i) in
243 if v /= 0
244 then Just i
245 else go (succ i)
246 | otherwise = Nothing
247
248foldBS :: (Word8 -> Int -> Int) -> (Word -> Int -> Int) -> Int -> ByteString -> Int
249foldBS f g acc b =
250 let (fptr, off, len) = B.toForeignPtr b in
251 B.inlinePerformIO $ withForeignPtr fptr $ \_ptr -> do
252 let ptr = _ptr `advancePtr` off
253
254 let (low, mid, hgh) = aligned (ptr, len)
255 let resL = goFold low acc
256 let resM = goFoldW (mid :: Mem Word) resL
257 let resH = goFold hgh resM
258
259 -- computation of res should not escape withForeignPtr
260 case resH of
261 0 -> return ()
262 _ -> return ()
263
264 return resH
265 139
140rarest :: [Bitfield] -> Maybe PieceIx
141rarest xs
142 | V.null freqMap = Nothing
143 | otherwise = Just $ fst $ V.ifoldr minIx (0, freqMap V.! 0) freqMap
266 where 144 where
267 goFold :: Mem Word8 -> Int -> Int 145 freqMap = frequencies xs
268 goFold (ptr, n) = go 0 146
269 where 147 minIx :: PieceIx -> Frequency -> (PieceIx, Frequency) -> (PieceIx, Frequency)
270 go :: Int -> Int -> Int 148 minIx ix fr acc@(_, fra)
271 go i !a 149 | fr < fra && fr > 0 = (ix, fr)
272 | i < n = 150 | otherwise = acc
273 let v = B.inlinePerformIO (peekElemOff ptr i) 151
274 in go (succ i) (f v a) 152
275 | otherwise = a 153
276 154{-----------------------------------------------------------------------
277 goFoldW :: Mem Word -> Int -> Int 155 Combine
278 goFoldW (ptr, n) = go 0 156-----------------------------------------------------------------------}
279 where
280 go :: Int -> Int -> Int
281 go i !a
282 | i < n =
283 let v = B.inlinePerformIO (peekElemOff ptr i)
284 in go (succ i) (g v a)
285 | otherwise = a
286 157
287union :: Bitfield -> Bitfield -> Bitfield 158union :: Bitfield -> Bitfield -> Bitfield
288union = zipWithBF (.|.) 159union a b = Bitfield {
289{-# INLINE union #-} 160 bfSize = bfSize a `max` bfSize b
161 , bfSet = bfSet a `S.union` bfSet b
162 }
290 163
291intersection :: Bitfield -> Bitfield -> Bitfield 164intersection :: Bitfield -> Bitfield -> Bitfield
292intersection = zipWithBF (.&.) 165intersection a b = Bitfield {
293{-# INLINE intersection #-} 166 bfSize = bfSize a `min` bfSize b
167 , bfSet = bfSet a `S.intersection` bfSet b
168 }
294 169
295difference :: Bitfield -> Bitfield -> Bitfield 170difference :: Bitfield -> Bitfield -> Bitfield
296difference = zipWithBF diffWord8 171difference a b = Bitfield {
297 where 172 bfSize = bfSize a -- FIXME is it more reasonable?
298 diffWord8 :: Bits a => a -> a -> a 173 , bfSet = bfSet a `S.difference` bfSet b
299 diffWord8 a b = a .&. (a `xor` b) 174 }
300 {-# INLINE diffWord8 #-}
301{-# INLINE difference #-}
302 175
303combine :: [Bitfield] -> Maybe Bitfield 176unions :: [Bitfield] -> Bitfield
304combine [] = Nothing 177unions = foldl' union (empty 0)
305combine as = return $ foldr1 intersection as
306 178
307haveCount :: Bitfield -> Int 179{-----------------------------------------------------------------------
308haveCount (MkBitfield b) = foldBS f f 0 b 180 Serialization
309 where 181-----------------------------------------------------------------------}
310 f byte count = popCount byte + count
311 182
312completeness :: Bitfield -> (Int, Int) 183getBitfield :: Int -> Get Bitfield
313completeness bf = (haveCount bf, bitfieldBitCount bf) 184getBitfield = error "getBitfield"
314 185
315-- | Get min index of piece that the peer have. 186putBitfield :: Bitfield -> Put
316findMin :: Bitfield -> Maybe Int 187putBitfield = error "putBitfield"
317findMin (MkBitfield b) = do 188
318 byteIx <- findSet b 189bitfieldByteCount :: Bitfield -> Int
319 bitIx <- findMinWord8 (B.index b byteIx) 190bitfieldByteCount = error "bitfieldByteCount"
320 return $ byteIx * bitSize (undefined :: Word8) + bitIx
321 where
322 -- TODO: bit tricks
323 findMinWord8 :: Word8 -> Maybe Int
324 findMinWord8 byte = L.find (testBit byte) [0..bitSize (undefined :: Word8) - 1]
325 {-# INLINE findMinWord8 #-}
326{-# INLINE findMin #-}
327
328
329findMax :: Bitfield -> Maybe Int
330findMax (MkBitfield b) = do
331 -- TODO avoid reverse
332 byteIx <- (pred (B.length b) -) <$> findSet (B.reverse b)
333 bitIx <- findMaxWord8 (B.index b byteIx)
334 return $ byteIx * bitSize (undefined :: Word8) + bitIx
335 where
336 -- TODO: bit tricks
337 findMaxWord8 :: Word8 -> Maybe Int
338 findMaxWord8 byte = L.find (testBit byte)
339 (reverse [0 :: Int ..
340 bitSize (undefined :: Word8) - 1])
341 191
342{-# INLINE findMax #-} 192{-----------------------------------------------------------------------
193 Debug
194-----------------------------------------------------------------------}
343 195
344frequencies :: [Bitfield] -> [Int] 196mkBitfield :: PieceCount -> [PieceIx] -> Bitfield
345frequencies xs = foldr1 (zipWith (+)) $ map (map fromEnum . toList) xs 197mkBitfield s ixs = Bitfield {
198 bfSize = s
199 , bfSet = S.splitLT s $ S.fromList ixs
200 } \ No newline at end of file