summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Truzjan <pxqr.sta@gmail.com>2014-01-02 16:21:49 +0400
committerSam Truzjan <pxqr.sta@gmail.com>2014-01-02 16:21:49 +0400
commit2b1ce778669dc58e8b2bf60a942722ede2c0515b (patch)
tree6a997901bb517422c0affde53a2d65e7df11aaa5
parent61650c4513d173967fa3a4cc73a76244319340cf (diff)
Add documentation to Routing
-rw-r--r--src/Network/BitTorrent/DHT/Routing.hs179
1 files changed, 107 insertions, 72 deletions
diff --git a/src/Network/BitTorrent/DHT/Routing.hs b/src/Network/BitTorrent/DHT/Routing.hs
index 43792b0b..4897a22e 100644
--- a/src/Network/BitTorrent/DHT/Routing.hs
+++ b/src/Network/BitTorrent/DHT/Routing.hs
@@ -5,36 +5,49 @@
5-- Stability : experimental 5-- Stability : experimental
6-- Portability : portable 6-- Portability : portable
7-- 7--
8-- Every node maintains a routing table of known good nodes. The
9-- nodes in the routing table are used as starting points for
10-- queries in the DHT. Nodes from the routing table are returned in
11-- response to queries from other nodes.
12--
13-- For more info see:
14-- <http://www.bittorrent.org/beps/bep_0005.html#routing-table>
15--
8{-# LANGUAGE RecordWildCards #-} 16{-# LANGUAGE RecordWildCards #-}
9{-# LANGUAGE TypeOperators #-} 17{-# LANGUAGE TypeOperators #-}
10{-# LANGUAGE DeriveGeneric #-} 18{-# LANGUAGE DeriveGeneric #-}
11{-# OPTIONS_GHC -fno-warn-orphans #-} 19{-# OPTIONS_GHC -fno-warn-orphans #-}
12module Network.BitTorrent.DHT.Routing 20module Network.BitTorrent.DHT.Routing
13 ( -- * Routing table 21 ( -- * Table
14 Table 22 Table
15 23
16 -- * Table attributes 24 -- * Attributes
17 , BucketCount 25 , BucketCount
26 , defaultBucketCount
18 , BucketSize 27 , BucketSize
28 , defaultBucketSize
19 , NodeCount 29 , NodeCount
20 30
21 -- * Routing
22 , Timestamp
23 , Routing
24 , runRouting
25
26 -- * Query 31 -- * Query
27 , thisId 32 , thisId
28 , shape 33 , shape
29 , Network.BitTorrent.DHT.Routing.size 34 , Network.BitTorrent.DHT.Routing.size
30 , Network.BitTorrent.DHT.Routing.depth 35 , Network.BitTorrent.DHT.Routing.depth
36
37 -- * Lookup
31 , K 38 , K
39 , defaultK
32 , Network.BitTorrent.DHT.Routing.kclosest 40 , Network.BitTorrent.DHT.Routing.kclosest
33 , Network.BitTorrent.DHT.Routing.kclosestHash 41 , Network.BitTorrent.DHT.Routing.kclosestHash
34 42
35 -- * Construction 43 -- * Construction
36 , Network.BitTorrent.DHT.Routing.nullTable 44 , Network.BitTorrent.DHT.Routing.nullTable
37 , Network.BitTorrent.DHT.Routing.insert 45 , Network.BitTorrent.DHT.Routing.insert
46
47 -- * Routing
48 , Timestamp
49 , Routing
50 , runRouting
38 ) where 51 ) where
39 52
40import Control.Applicative as A 53import Control.Applicative as A
@@ -56,50 +69,28 @@ import Text.PrettyPrint.Class
56import Data.Torrent.InfoHash 69import Data.Torrent.InfoHash
57import Network.BitTorrent.Core 70import Network.BitTorrent.Core
58 71
59{-
60-- | Routing tree should contain key -> value pairs in this way:
61--
62-- * More keys that near to our node key, and less keys that far
63-- from our node key.
64--
65-- * Tree might be saturated. If this happen we can only update
66-- buckets, but we can't add new buckets.
67--
68-- Instead of using ordinary binary tree and keep track is it
69-- following restrictions above (that's somewhat non-trivial) we
70-- store distance -> value keys. This lead to simple data structure
71-- that actually isomorphic to non-empty list. So we first map our
72-- keys to distances using our node ID and store them in tree. When
73-- we need to extract a pair we map distances to keys back, again
74-- using our node ID. This normalization happen in routing table.
75--
76data Tree k v
77 = Tip (Bucket k v)
78 | Bin (Tree k v) (Bucket k v)
79
80empty :: Int -> Tree k v
81empty = Tip . Bucket.empty
82
83insert :: Applicative f => Bits k
84 => (v -> f Bool) -> (k, v) -> Tree k v -> f (Tree k v)
85insert ping (k, v) = go 0
86 where
87 go n (Tip bucket)
88 | isFull bucket, (near, far) <- split n bucket
89 = pure (Tip near `Bin` far)
90 | otherwise = Tip <$> Bucket.insert ping (k, v) bucket
91
92 go n (Bin near far)
93 | k `testBit` n = Bin <$> pure near <*> Bucket.insert ping (k, v) far
94 | otherwise = Bin <$> go (succ n) near <*> pure far
95-}
96
97{----------------------------------------------------------------------- 72{-----------------------------------------------------------------------
98-- Routing monad 73-- Routing monad
99-----------------------------------------------------------------------} 74-----------------------------------------------------------------------}
100 75
76-- | Last time the node was responding to our queries.
77--
78-- Not all nodes that we learn about are equal. Some are \"good\" and
79-- some are not. Many nodes using the DHT are able to send queries
80-- and receive responses, but are not able to respond to queries
81-- from other nodes. It is important that each node's routing table
82-- must contain only known good nodes. A good node is a node has
83-- responded to one of our queries within the last 15 minutes. A
84-- node is also good if it has ever responded to one of our queries
85-- and has sent us a query within the last 15 minutes. After 15
86-- minutes of inactivity, a node becomes questionable. Nodes become
87-- bad when they fail to respond to multiple queries in a row. Nodes
88-- that we know are good are given priority over nodes with unknown
89-- status.
90--
101type Timestamp = POSIXTime 91type Timestamp = POSIXTime
102 92
93-- | Some routing operations might need to perform additional IO.
103data Routing ip result 94data Routing ip result
104 = Full 95 = Full
105 | Done result 96 | Done result
@@ -136,12 +127,13 @@ instance Alternative (Routing ip) where
136 NeedPing a f <|> m = NeedPing a $ \ p -> f p <|> m 127 NeedPing a f <|> m = NeedPing a $ \ p -> f p <|> m
137 Refresh n f <|> m = Refresh n $ \ i -> f i <|> m 128 Refresh n f <|> m = Refresh n $ \ i -> f i <|> m
138 129
130-- | Run routing table operation.
139runRouting :: (Monad m, Eq ip) 131runRouting :: (Monad m, Eq ip)
140 => (NodeAddr ip -> m Bool) -- ^ ping_node 132 => (NodeAddr ip -> m Bool) -- ^ ping the specific node;
141 -> (NodeId -> m [NodeInfo ip]) -- ^ find_nodes 133 -> (NodeId -> m [NodeInfo ip]) -- ^ get closest nodes;
142 -> m Timestamp -- ^ timestamper 134 -> m Timestamp -- ^ get current time;
143 -> Routing ip f -- ^ action 135 -> Routing ip f -- ^ operation to run;
144 -> m (Maybe f) -- ^ result 136 -> m (Maybe f) -- ^ operation result;
145runRouting ping_node find_nodes timestamper = go 137runRouting ping_node find_nodes timestamper = go
146 where 138 where
147 go Full = return (Nothing) 139 go Full = return (Nothing)
@@ -160,16 +152,27 @@ runRouting ping_node find_nodes timestamper = go
160 152
161getTime :: Routing ip Timestamp 153getTime :: Routing ip Timestamp
162getTime = GetTime return 154getTime = GetTime return
155{-# INLINE getTime #-}
163 156
164needPing :: NodeAddr ip -> Routing ip Bool 157needPing :: NodeAddr ip -> Routing ip Bool
165needPing addr = NeedPing addr return 158needPing addr = NeedPing addr return
159{-# INLINE needPing #-}
166 160
167refresh :: NodeId -> Routing ip [NodeInfo ip] 161refresh :: NodeId -> Routing ip [NodeInfo ip]
168refresh nid = Refresh nid return 162refresh nid = Refresh nid return
163{-# INLINE refresh #-}
169 164
170{----------------------------------------------------------------------- 165{-----------------------------------------------------------------------
171 Bucket 166 Bucket
172-----------------------------------------------------------------------} 167-----------------------------------------------------------------------}
168-- TODO: add replacement cache to the bucket
169--
170-- When a k-bucket is full and a new node is discovered for that
171-- k-bucket, the least recently seen node in the k-bucket is
172-- PINGed. If the node is found to be still alive, the new node is
173-- place in a secondary list, a replacement cache. The replacement
174-- cache is used only if a node in the k-bucket stops responding. In
175-- other words: new nodes are used only when older nodes disappear.
173 176
174-- | Timestamp - last time this node is pinged. 177-- | Timestamp - last time this node is pinged.
175type NodeEntry ip = Binding (NodeInfo ip) Timestamp 178type NodeEntry ip = Binding (NodeInfo ip) Timestamp
@@ -180,9 +183,13 @@ instance (Serialize k, Serialize v) => Serialize (Binding k v) where
180 183
181-- TODO instance Pretty where 184-- TODO instance Pretty where
182 185
183-- | Most clients use this value for maximum bucket size. 186-- | Number of nodes in a bucket.
187type BucketSize = Int
188
189-- | Maximum number of 'NodeInfo's stored in a bucket. Most clients
190-- use this value.
184defaultBucketSize :: BucketSize 191defaultBucketSize :: BucketSize
185defaultBucketSize = 20 192defaultBucketSize = 8
186 193
187-- | Bucket is also limited in its length — thus it's called k-bucket. 194-- | Bucket is also limited in its length — thus it's called k-bucket.
188-- When bucket becomes full we should split it in two lists by 195-- When bucket becomes full we should split it in two lists by
@@ -213,12 +220,7 @@ leastRecently = minView
213delta :: NominalDiffTime 220delta :: NominalDiffTime
214delta = 15 * 60 221delta = 15 * 60
215 222
216-- | Max bucket size, in nodes. 223-- | Should maintain a set of stable long running nodes.
217type Alpha = Int
218
219defaultAlpha :: Alpha
220defaultAlpha = 8
221
222insertBucket :: Eq ip => Timestamp -> NodeInfo ip -> Bucket ip 224insertBucket :: Eq ip => Timestamp -> NodeInfo ip -> Bucket ip
223 -> ip `Routing` Bucket ip 225 -> ip `Routing` Bucket ip
224insertBucket curTime info bucket 226insertBucket curTime info bucket
@@ -226,7 +228,7 @@ insertBucket curTime info bucket
226 | Just _ <- PSQ.lookup info bucket = do 228 | Just _ <- PSQ.lookup info bucket = do
227 return $ PSQ.insertWith max info curTime bucket 229 return $ PSQ.insertWith max info curTime bucket
228 230
229 -- update the all bucket if it is too outdated 231 -- Buckets that have not been changed in 15 minutes should be "refreshed."
230 | Just (NodeInfo {..} :-> lastSeen) <- lastChanged bucket 232 | Just (NodeInfo {..} :-> lastSeen) <- lastChanged bucket
231 , curTime - lastSeen > delta = do 233 , curTime - lastSeen > delta = do
232 infos <- refresh nodeId 234 infos <- refresh nodeId
@@ -234,8 +236,11 @@ insertBucket curTime info bucket
234 let newBucket = L.foldr (\ x -> PSQ.insertWith max x refTime) bucket infos 236 let newBucket = L.foldr (\ x -> PSQ.insertWith max x refTime) bucket infos
235 insertBucket refTime info newBucket 237 insertBucket refTime info newBucket
236 238
237 -- update questionable nodes, if any; then try to insert our new node 239 -- If there are any questionable nodes in the bucket have not been
238 -- this case can remove bad nodes from bucket, so we can insert a new one 240 -- seen in the last 15 minutes, the least recently seen node is
241 -- pinged. If any nodes in the bucket are known to have become bad,
242 -- then one is replaced by the new node in the next insertBucket
243 -- iteration.
239 | Just ((old @ NodeInfo {..} :-> leastSeen), rest) <- leastRecently bucket 244 | Just ((old @ NodeInfo {..} :-> leastSeen), rest) <- leastRecently bucket
240 , curTime - leastSeen > delta = do 245 , curTime - leastSeen > delta = do
241 pong <- needPing nodeAddr 246 pong <- needPing nodeAddr
@@ -244,10 +249,10 @@ insertBucket curTime info bucket
244 insertBucket pongTime info newBucket 249 insertBucket pongTime info newBucket
245 250
246 -- bucket is good, but not full => we can insert a new node 251 -- bucket is good, but not full => we can insert a new node
247 | PSQ.size bucket < defaultAlpha = do 252 | PSQ.size bucket < defaultBucketSize = do
248 return $ PSQ.insert info curTime bucket 253 return $ PSQ.insert info curTime bucket
249 254
250 -- bucket is full of good nodes => ignore new node 255 -- When the bucket is full of good nodes, the new node is simply discarded.
251 | otherwise = A.empty 256 | otherwise = A.empty
252 257
253insertNode :: Eq ip => NodeInfo ip -> Bucket ip -> ip `Routing` Bucket ip 258insertNode :: Eq ip => NodeInfo ip -> Bucket ip -> ip `Routing` Bucket ip
@@ -266,12 +271,36 @@ split i = (PSQ.fromList *** PSQ.fromList) . partition spanBit . PSQ.toList
266-- Table 271-- Table
267-----------------------------------------------------------------------} 272-----------------------------------------------------------------------}
268 273
274-- | Number of buckets in a routing table.
275type BucketCount = Int
276
269defaultBucketCount :: BucketCount 277defaultBucketCount :: BucketCount
270defaultBucketCount = 20 278defaultBucketCount = 20
271 279
280-- | The routing table covers the entire 'NodeId' space from 0 to 2 ^
281-- 160. The routing table is subdivided into 'Bucket's that each cover
282-- a portion of the space. An empty table has one bucket with an ID
283-- space range of @min = 0, max = 2 ^ 160@. When a node with ID \"N\"
284-- is inserted into the table, it is placed within the bucket that has
285-- @min <= N < max@. An empty table has only one bucket so any node
286-- must fit within it. Each bucket can only hold 'K' nodes, currently
287-- eight, before becoming 'Full'. When a bucket is full of known good
288-- nodes, no more nodes may be added unless our own 'NodeId' falls
289-- within the range of the 'Bucket'. In that case, the bucket is
290-- replaced by two new buckets each with half the range of the old
291-- bucket and the nodes from the old bucket are distributed among the
292-- two new ones. For a new table with only one bucket, the full bucket
293-- is always split into two new buckets covering the ranges @0..2 ^
294-- 159@ and @2 ^ 159..2 ^ 160@.
295--
272data Table ip 296data Table ip
297 -- most nearest bucket
273 = Tip NodeId BucketCount (Bucket ip) 298 = Tip NodeId BucketCount (Bucket ip)
299
300 -- left biased tree branch
274 | Zero (Table ip) (Bucket ip) 301 | Zero (Table ip) (Bucket ip)
302
303 -- right biased tree branch
275 | One (Bucket ip) (Table ip) 304 | One (Bucket ip) (Table ip)
276 deriving Generic 305 deriving Generic
277 306
@@ -305,8 +334,7 @@ thisId (Tip nid _ _) = nid
305thisId (Zero table _) = thisId table 334thisId (Zero table _) = thisId table
306thisId (One _ table) = thisId table 335thisId (One _ table) = thisId table
307 336
308type BucketSize = Int 337-- | Number of nodes in a bucket or a table.
309type BucketCount = Int
310type NodeCount = Int 338type NodeCount = Int
311 339
312-- | Internally, routing table is similar to list of buckets or a 340-- | Internally, routing table is similar to list of buckets or a
@@ -335,21 +363,28 @@ lookupBucket nid = go 0
335 | otherwise = pure bucket 363 | otherwise = pure bucket
336 go _ (Tip _ _ bucket) = pure bucket 364 go _ (Tip _ _ bucket) = pure bucket
337 365
366-- | Count of closest nodes in find_node request.
338type K = Int 367type K = Int
339 368
340-- | Used in 'find_node' queries. 369-- | Default 'K' is equal to 'defaultBucketSize'.
370defaultK :: K
371defaultK = 8
372
373-- | Get a list of /K/ closest nodes using XOR metric. Used in
374-- 'find_node' queries.
341kclosest :: Eq ip => K -> NodeId -> Table ip -> [NodeInfo ip] 375kclosest :: Eq ip => K -> NodeId -> Table ip -> [NodeInfo ip]
342kclosest k nid = L.map key . PSQ.toList . fromMaybe PSQ.empty 376kclosest k nid = L.map key . PSQ.toList . fromMaybe PSQ.empty
343 . lookupBucket nid 377 . lookupBucket nid
344 378
345coerseId :: (Serialize a, Serialize b) => a -> b 379coerceId :: (Serialize a, Serialize b) => a -> b
346coerseId = either (error msg) id . S.decode . S.encode 380coerceId = either (error msg) id . S.decode . S.encode
347 where 381 where
348 msg = "coerseId: impossible" 382 msg = "coerceId: impossible"
349 383
350-- | Used in 'get_peers' queries. 384-- | Get a list of /K/ nodes with node id closest to the specific
351kclosestHash :: Eq a => Alpha -> InfoHash -> Table a -> [NodeInfo a] 385-- infohash. Used in 'get_peers' queries.
352kclosestHash k nid t = kclosest k (coerseId nid) t 386kclosestHash :: Eq a => K -> InfoHash -> Table a -> [NodeInfo a]
387kclosestHash k nid t = kclosest k (coerceId nid) t
353 388
354{----------------------------------------------------------------------- 389{-----------------------------------------------------------------------
355-- Routing 390-- Routing