diff options
author | Sam Truzjan <pxqr.sta@gmail.com> | 2014-01-02 16:21:49 +0400 |
---|---|---|
committer | Sam Truzjan <pxqr.sta@gmail.com> | 2014-01-02 16:21:49 +0400 |
commit | 2b1ce778669dc58e8b2bf60a942722ede2c0515b (patch) | |
tree | 6a997901bb517422c0affde53a2d65e7df11aaa5 /src | |
parent | 61650c4513d173967fa3a4cc73a76244319340cf (diff) |
Add documentation to Routing
Diffstat (limited to 'src')
-rw-r--r-- | src/Network/BitTorrent/DHT/Routing.hs | 179 |
1 files changed, 107 insertions, 72 deletions
diff --git a/src/Network/BitTorrent/DHT/Routing.hs b/src/Network/BitTorrent/DHT/Routing.hs index 43792b0b..4897a22e 100644 --- a/src/Network/BitTorrent/DHT/Routing.hs +++ b/src/Network/BitTorrent/DHT/Routing.hs | |||
@@ -5,36 +5,49 @@ | |||
5 | -- Stability : experimental | 5 | -- Stability : experimental |
6 | -- Portability : portable | 6 | -- Portability : portable |
7 | -- | 7 | -- |
8 | -- Every node maintains a routing table of known good nodes. The | ||
9 | -- nodes in the routing table are used as starting points for | ||
10 | -- queries in the DHT. Nodes from the routing table are returned in | ||
11 | -- response to queries from other nodes. | ||
12 | -- | ||
13 | -- For more info see: | ||
14 | -- <http://www.bittorrent.org/beps/bep_0005.html#routing-table> | ||
15 | -- | ||
8 | {-# LANGUAGE RecordWildCards #-} | 16 | {-# LANGUAGE RecordWildCards #-} |
9 | {-# LANGUAGE TypeOperators #-} | 17 | {-# LANGUAGE TypeOperators #-} |
10 | {-# LANGUAGE DeriveGeneric #-} | 18 | {-# LANGUAGE DeriveGeneric #-} |
11 | {-# OPTIONS_GHC -fno-warn-orphans #-} | 19 | {-# OPTIONS_GHC -fno-warn-orphans #-} |
12 | module Network.BitTorrent.DHT.Routing | 20 | module Network.BitTorrent.DHT.Routing |
13 | ( -- * Routing table | 21 | ( -- * Table |
14 | Table | 22 | Table |
15 | 23 | ||
16 | -- * Table attributes | 24 | -- * Attributes |
17 | , BucketCount | 25 | , BucketCount |
26 | , defaultBucketCount | ||
18 | , BucketSize | 27 | , BucketSize |
28 | , defaultBucketSize | ||
19 | , NodeCount | 29 | , NodeCount |
20 | 30 | ||
21 | -- * Routing | ||
22 | , Timestamp | ||
23 | , Routing | ||
24 | , runRouting | ||
25 | |||
26 | -- * Query | 31 | -- * Query |
27 | , thisId | 32 | , thisId |
28 | , shape | 33 | , shape |
29 | , Network.BitTorrent.DHT.Routing.size | 34 | , Network.BitTorrent.DHT.Routing.size |
30 | , Network.BitTorrent.DHT.Routing.depth | 35 | , Network.BitTorrent.DHT.Routing.depth |
36 | |||
37 | -- * Lookup | ||
31 | , K | 38 | , K |
39 | , defaultK | ||
32 | , Network.BitTorrent.DHT.Routing.kclosest | 40 | , Network.BitTorrent.DHT.Routing.kclosest |
33 | , Network.BitTorrent.DHT.Routing.kclosestHash | 41 | , Network.BitTorrent.DHT.Routing.kclosestHash |
34 | 42 | ||
35 | -- * Construction | 43 | -- * Construction |
36 | , Network.BitTorrent.DHT.Routing.nullTable | 44 | , Network.BitTorrent.DHT.Routing.nullTable |
37 | , Network.BitTorrent.DHT.Routing.insert | 45 | , Network.BitTorrent.DHT.Routing.insert |
46 | |||
47 | -- * Routing | ||
48 | , Timestamp | ||
49 | , Routing | ||
50 | , runRouting | ||
38 | ) where | 51 | ) where |
39 | 52 | ||
40 | import Control.Applicative as A | 53 | import Control.Applicative as A |
@@ -56,50 +69,28 @@ import Text.PrettyPrint.Class | |||
56 | import Data.Torrent.InfoHash | 69 | import Data.Torrent.InfoHash |
57 | import Network.BitTorrent.Core | 70 | import Network.BitTorrent.Core |
58 | 71 | ||
59 | {- | ||
60 | -- | Routing tree should contain key -> value pairs in this way: | ||
61 | -- | ||
62 | -- * More keys that near to our node key, and less keys that far | ||
63 | -- from our node key. | ||
64 | -- | ||
65 | -- * Tree might be saturated. If this happen we can only update | ||
66 | -- buckets, but we can't add new buckets. | ||
67 | -- | ||
68 | -- Instead of using ordinary binary tree and keep track is it | ||
69 | -- following restrictions above (that's somewhat non-trivial) we | ||
70 | -- store distance -> value keys. This lead to simple data structure | ||
71 | -- that actually isomorphic to non-empty list. So we first map our | ||
72 | -- keys to distances using our node ID and store them in tree. When | ||
73 | -- we need to extract a pair we map distances to keys back, again | ||
74 | -- using our node ID. This normalization happen in routing table. | ||
75 | -- | ||
76 | data Tree k v | ||
77 | = Tip (Bucket k v) | ||
78 | | Bin (Tree k v) (Bucket k v) | ||
79 | |||
80 | empty :: Int -> Tree k v | ||
81 | empty = Tip . Bucket.empty | ||
82 | |||
83 | insert :: Applicative f => Bits k | ||
84 | => (v -> f Bool) -> (k, v) -> Tree k v -> f (Tree k v) | ||
85 | insert ping (k, v) = go 0 | ||
86 | where | ||
87 | go n (Tip bucket) | ||
88 | | isFull bucket, (near, far) <- split n bucket | ||
89 | = pure (Tip near `Bin` far) | ||
90 | | otherwise = Tip <$> Bucket.insert ping (k, v) bucket | ||
91 | |||
92 | go n (Bin near far) | ||
93 | | k `testBit` n = Bin <$> pure near <*> Bucket.insert ping (k, v) far | ||
94 | | otherwise = Bin <$> go (succ n) near <*> pure far | ||
95 | -} | ||
96 | |||
97 | {----------------------------------------------------------------------- | 72 | {----------------------------------------------------------------------- |
98 | -- Routing monad | 73 | -- Routing monad |
99 | -----------------------------------------------------------------------} | 74 | -----------------------------------------------------------------------} |
100 | 75 | ||
76 | -- | Last time the node was responding to our queries. | ||
77 | -- | ||
78 | -- Not all nodes that we learn about are equal. Some are \"good\" and | ||
79 | -- some are not. Many nodes using the DHT are able to send queries | ||
80 | -- and receive responses, but are not able to respond to queries | ||
81 | -- from other nodes. It is important that each node's routing table | ||
82 | -- must contain only known good nodes. A good node is a node has | ||
83 | -- responded to one of our queries within the last 15 minutes. A | ||
84 | -- node is also good if it has ever responded to one of our queries | ||
85 | -- and has sent us a query within the last 15 minutes. After 15 | ||
86 | -- minutes of inactivity, a node becomes questionable. Nodes become | ||
87 | -- bad when they fail to respond to multiple queries in a row. Nodes | ||
88 | -- that we know are good are given priority over nodes with unknown | ||
89 | -- status. | ||
90 | -- | ||
101 | type Timestamp = POSIXTime | 91 | type Timestamp = POSIXTime |
102 | 92 | ||
93 | -- | Some routing operations might need to perform additional IO. | ||
103 | data Routing ip result | 94 | data Routing ip result |
104 | = Full | 95 | = Full |
105 | | Done result | 96 | | Done result |
@@ -136,12 +127,13 @@ instance Alternative (Routing ip) where | |||
136 | NeedPing a f <|> m = NeedPing a $ \ p -> f p <|> m | 127 | NeedPing a f <|> m = NeedPing a $ \ p -> f p <|> m |
137 | Refresh n f <|> m = Refresh n $ \ i -> f i <|> m | 128 | Refresh n f <|> m = Refresh n $ \ i -> f i <|> m |
138 | 129 | ||
130 | -- | Run routing table operation. | ||
139 | runRouting :: (Monad m, Eq ip) | 131 | runRouting :: (Monad m, Eq ip) |
140 | => (NodeAddr ip -> m Bool) -- ^ ping_node | 132 | => (NodeAddr ip -> m Bool) -- ^ ping the specific node; |
141 | -> (NodeId -> m [NodeInfo ip]) -- ^ find_nodes | 133 | -> (NodeId -> m [NodeInfo ip]) -- ^ get closest nodes; |
142 | -> m Timestamp -- ^ timestamper | 134 | -> m Timestamp -- ^ get current time; |
143 | -> Routing ip f -- ^ action | 135 | -> Routing ip f -- ^ operation to run; |
144 | -> m (Maybe f) -- ^ result | 136 | -> m (Maybe f) -- ^ operation result; |
145 | runRouting ping_node find_nodes timestamper = go | 137 | runRouting ping_node find_nodes timestamper = go |
146 | where | 138 | where |
147 | go Full = return (Nothing) | 139 | go Full = return (Nothing) |
@@ -160,16 +152,27 @@ runRouting ping_node find_nodes timestamper = go | |||
160 | 152 | ||
161 | getTime :: Routing ip Timestamp | 153 | getTime :: Routing ip Timestamp |
162 | getTime = GetTime return | 154 | getTime = GetTime return |
155 | {-# INLINE getTime #-} | ||
163 | 156 | ||
164 | needPing :: NodeAddr ip -> Routing ip Bool | 157 | needPing :: NodeAddr ip -> Routing ip Bool |
165 | needPing addr = NeedPing addr return | 158 | needPing addr = NeedPing addr return |
159 | {-# INLINE needPing #-} | ||
166 | 160 | ||
167 | refresh :: NodeId -> Routing ip [NodeInfo ip] | 161 | refresh :: NodeId -> Routing ip [NodeInfo ip] |
168 | refresh nid = Refresh nid return | 162 | refresh nid = Refresh nid return |
163 | {-# INLINE refresh #-} | ||
169 | 164 | ||
170 | {----------------------------------------------------------------------- | 165 | {----------------------------------------------------------------------- |
171 | Bucket | 166 | Bucket |
172 | -----------------------------------------------------------------------} | 167 | -----------------------------------------------------------------------} |
168 | -- TODO: add replacement cache to the bucket | ||
169 | -- | ||
170 | -- When a k-bucket is full and a new node is discovered for that | ||
171 | -- k-bucket, the least recently seen node in the k-bucket is | ||
172 | -- PINGed. If the node is found to be still alive, the new node is | ||
173 | -- place in a secondary list, a replacement cache. The replacement | ||
174 | -- cache is used only if a node in the k-bucket stops responding. In | ||
175 | -- other words: new nodes are used only when older nodes disappear. | ||
173 | 176 | ||
174 | -- | Timestamp - last time this node is pinged. | 177 | -- | Timestamp - last time this node is pinged. |
175 | type NodeEntry ip = Binding (NodeInfo ip) Timestamp | 178 | type NodeEntry ip = Binding (NodeInfo ip) Timestamp |
@@ -180,9 +183,13 @@ instance (Serialize k, Serialize v) => Serialize (Binding k v) where | |||
180 | 183 | ||
181 | -- TODO instance Pretty where | 184 | -- TODO instance Pretty where |
182 | 185 | ||
183 | -- | Most clients use this value for maximum bucket size. | 186 | -- | Number of nodes in a bucket. |
187 | type BucketSize = Int | ||
188 | |||
189 | -- | Maximum number of 'NodeInfo's stored in a bucket. Most clients | ||
190 | -- use this value. | ||
184 | defaultBucketSize :: BucketSize | 191 | defaultBucketSize :: BucketSize |
185 | defaultBucketSize = 20 | 192 | defaultBucketSize = 8 |
186 | 193 | ||
187 | -- | Bucket is also limited in its length — thus it's called k-bucket. | 194 | -- | Bucket is also limited in its length — thus it's called k-bucket. |
188 | -- When bucket becomes full we should split it in two lists by | 195 | -- When bucket becomes full we should split it in two lists by |
@@ -213,12 +220,7 @@ leastRecently = minView | |||
213 | delta :: NominalDiffTime | 220 | delta :: NominalDiffTime |
214 | delta = 15 * 60 | 221 | delta = 15 * 60 |
215 | 222 | ||
216 | -- | Max bucket size, in nodes. | 223 | -- | Should maintain a set of stable long running nodes. |
217 | type Alpha = Int | ||
218 | |||
219 | defaultAlpha :: Alpha | ||
220 | defaultAlpha = 8 | ||
221 | |||
222 | insertBucket :: Eq ip => Timestamp -> NodeInfo ip -> Bucket ip | 224 | insertBucket :: Eq ip => Timestamp -> NodeInfo ip -> Bucket ip |
223 | -> ip `Routing` Bucket ip | 225 | -> ip `Routing` Bucket ip |
224 | insertBucket curTime info bucket | 226 | insertBucket curTime info bucket |
@@ -226,7 +228,7 @@ insertBucket curTime info bucket | |||
226 | | Just _ <- PSQ.lookup info bucket = do | 228 | | Just _ <- PSQ.lookup info bucket = do |
227 | return $ PSQ.insertWith max info curTime bucket | 229 | return $ PSQ.insertWith max info curTime bucket |
228 | 230 | ||
229 | -- update the all bucket if it is too outdated | 231 | -- Buckets that have not been changed in 15 minutes should be "refreshed." |
230 | | Just (NodeInfo {..} :-> lastSeen) <- lastChanged bucket | 232 | | Just (NodeInfo {..} :-> lastSeen) <- lastChanged bucket |
231 | , curTime - lastSeen > delta = do | 233 | , curTime - lastSeen > delta = do |
232 | infos <- refresh nodeId | 234 | infos <- refresh nodeId |
@@ -234,8 +236,11 @@ insertBucket curTime info bucket | |||
234 | let newBucket = L.foldr (\ x -> PSQ.insertWith max x refTime) bucket infos | 236 | let newBucket = L.foldr (\ x -> PSQ.insertWith max x refTime) bucket infos |
235 | insertBucket refTime info newBucket | 237 | insertBucket refTime info newBucket |
236 | 238 | ||
237 | -- update questionable nodes, if any; then try to insert our new node | 239 | -- If there are any questionable nodes in the bucket have not been |
238 | -- this case can remove bad nodes from bucket, so we can insert a new one | 240 | -- seen in the last 15 minutes, the least recently seen node is |
241 | -- pinged. If any nodes in the bucket are known to have become bad, | ||
242 | -- then one is replaced by the new node in the next insertBucket | ||
243 | -- iteration. | ||
239 | | Just ((old @ NodeInfo {..} :-> leastSeen), rest) <- leastRecently bucket | 244 | | Just ((old @ NodeInfo {..} :-> leastSeen), rest) <- leastRecently bucket |
240 | , curTime - leastSeen > delta = do | 245 | , curTime - leastSeen > delta = do |
241 | pong <- needPing nodeAddr | 246 | pong <- needPing nodeAddr |
@@ -244,10 +249,10 @@ insertBucket curTime info bucket | |||
244 | insertBucket pongTime info newBucket | 249 | insertBucket pongTime info newBucket |
245 | 250 | ||
246 | -- bucket is good, but not full => we can insert a new node | 251 | -- bucket is good, but not full => we can insert a new node |
247 | | PSQ.size bucket < defaultAlpha = do | 252 | | PSQ.size bucket < defaultBucketSize = do |
248 | return $ PSQ.insert info curTime bucket | 253 | return $ PSQ.insert info curTime bucket |
249 | 254 | ||
250 | -- bucket is full of good nodes => ignore new node | 255 | -- When the bucket is full of good nodes, the new node is simply discarded. |
251 | | otherwise = A.empty | 256 | | otherwise = A.empty |
252 | 257 | ||
253 | insertNode :: Eq ip => NodeInfo ip -> Bucket ip -> ip `Routing` Bucket ip | 258 | insertNode :: Eq ip => NodeInfo ip -> Bucket ip -> ip `Routing` Bucket ip |
@@ -266,12 +271,36 @@ split i = (PSQ.fromList *** PSQ.fromList) . partition spanBit . PSQ.toList | |||
266 | -- Table | 271 | -- Table |
267 | -----------------------------------------------------------------------} | 272 | -----------------------------------------------------------------------} |
268 | 273 | ||
274 | -- | Number of buckets in a routing table. | ||
275 | type BucketCount = Int | ||
276 | |||
269 | defaultBucketCount :: BucketCount | 277 | defaultBucketCount :: BucketCount |
270 | defaultBucketCount = 20 | 278 | defaultBucketCount = 20 |
271 | 279 | ||
280 | -- | The routing table covers the entire 'NodeId' space from 0 to 2 ^ | ||
281 | -- 160. The routing table is subdivided into 'Bucket's that each cover | ||
282 | -- a portion of the space. An empty table has one bucket with an ID | ||
283 | -- space range of @min = 0, max = 2 ^ 160@. When a node with ID \"N\" | ||
284 | -- is inserted into the table, it is placed within the bucket that has | ||
285 | -- @min <= N < max@. An empty table has only one bucket so any node | ||
286 | -- must fit within it. Each bucket can only hold 'K' nodes, currently | ||
287 | -- eight, before becoming 'Full'. When a bucket is full of known good | ||
288 | -- nodes, no more nodes may be added unless our own 'NodeId' falls | ||
289 | -- within the range of the 'Bucket'. In that case, the bucket is | ||
290 | -- replaced by two new buckets each with half the range of the old | ||
291 | -- bucket and the nodes from the old bucket are distributed among the | ||
292 | -- two new ones. For a new table with only one bucket, the full bucket | ||
293 | -- is always split into two new buckets covering the ranges @0..2 ^ | ||
294 | -- 159@ and @2 ^ 159..2 ^ 160@. | ||
295 | -- | ||
272 | data Table ip | 296 | data Table ip |
297 | -- most nearest bucket | ||
273 | = Tip NodeId BucketCount (Bucket ip) | 298 | = Tip NodeId BucketCount (Bucket ip) |
299 | |||
300 | -- left biased tree branch | ||
274 | | Zero (Table ip) (Bucket ip) | 301 | | Zero (Table ip) (Bucket ip) |
302 | |||
303 | -- right biased tree branch | ||
275 | | One (Bucket ip) (Table ip) | 304 | | One (Bucket ip) (Table ip) |
276 | deriving Generic | 305 | deriving Generic |
277 | 306 | ||
@@ -305,8 +334,7 @@ thisId (Tip nid _ _) = nid | |||
305 | thisId (Zero table _) = thisId table | 334 | thisId (Zero table _) = thisId table |
306 | thisId (One _ table) = thisId table | 335 | thisId (One _ table) = thisId table |
307 | 336 | ||
308 | type BucketSize = Int | 337 | -- | Number of nodes in a bucket or a table. |
309 | type BucketCount = Int | ||
310 | type NodeCount = Int | 338 | type NodeCount = Int |
311 | 339 | ||
312 | -- | Internally, routing table is similar to list of buckets or a | 340 | -- | Internally, routing table is similar to list of buckets or a |
@@ -335,21 +363,28 @@ lookupBucket nid = go 0 | |||
335 | | otherwise = pure bucket | 363 | | otherwise = pure bucket |
336 | go _ (Tip _ _ bucket) = pure bucket | 364 | go _ (Tip _ _ bucket) = pure bucket |
337 | 365 | ||
366 | -- | Count of closest nodes in find_node request. | ||
338 | type K = Int | 367 | type K = Int |
339 | 368 | ||
340 | -- | Used in 'find_node' queries. | 369 | -- | Default 'K' is equal to 'defaultBucketSize'. |
370 | defaultK :: K | ||
371 | defaultK = 8 | ||
372 | |||
373 | -- | Get a list of /K/ closest nodes using XOR metric. Used in | ||
374 | -- 'find_node' queries. | ||
341 | kclosest :: Eq ip => K -> NodeId -> Table ip -> [NodeInfo ip] | 375 | kclosest :: Eq ip => K -> NodeId -> Table ip -> [NodeInfo ip] |
342 | kclosest k nid = L.map key . PSQ.toList . fromMaybe PSQ.empty | 376 | kclosest k nid = L.map key . PSQ.toList . fromMaybe PSQ.empty |
343 | . lookupBucket nid | 377 | . lookupBucket nid |
344 | 378 | ||
345 | coerseId :: (Serialize a, Serialize b) => a -> b | 379 | coerceId :: (Serialize a, Serialize b) => a -> b |
346 | coerseId = either (error msg) id . S.decode . S.encode | 380 | coerceId = either (error msg) id . S.decode . S.encode |
347 | where | 381 | where |
348 | msg = "coerseId: impossible" | 382 | msg = "coerceId: impossible" |
349 | 383 | ||
350 | -- | Used in 'get_peers' queries. | 384 | -- | Get a list of /K/ nodes with node id closest to the specific |
351 | kclosestHash :: Eq a => Alpha -> InfoHash -> Table a -> [NodeInfo a] | 385 | -- infohash. Used in 'get_peers' queries. |
352 | kclosestHash k nid t = kclosest k (coerseId nid) t | 386 | kclosestHash :: Eq a => K -> InfoHash -> Table a -> [NodeInfo a] |
387 | kclosestHash k nid t = kclosest k (coerceId nid) t | ||
353 | 388 | ||
354 | {----------------------------------------------------------------------- | 389 | {----------------------------------------------------------------------- |
355 | -- Routing | 390 | -- Routing |