From 086e81d141c0e2435563b2b59e8065dcd27ce5a2 Mon Sep 17 00:00:00 2001 From: Sam T Date: Thu, 20 Jun 2013 23:10:06 +0400 Subject: ~ Merge kademlia package. --- src/Data/Kademlia/Common.hs | 48 +++++++++++++ src/Data/Kademlia/Routing/Bucket.hs | 139 ++++++++++++++++++++++++++++++++++++ src/Data/Kademlia/Routing/Table.hs | 38 ++++++++++ src/Data/Kademlia/Routing/Tree.hs | 56 +++++++++++++++ 4 files changed, 281 insertions(+) create mode 100644 src/Data/Kademlia/Common.hs create mode 100644 src/Data/Kademlia/Routing/Bucket.hs create mode 100644 src/Data/Kademlia/Routing/Table.hs create mode 100644 src/Data/Kademlia/Routing/Tree.hs (limited to 'src/Data/Kademlia') diff --git a/src/Data/Kademlia/Common.hs b/src/Data/Kademlia/Common.hs new file mode 100644 index 00000000..874120d8 --- /dev/null +++ b/src/Data/Kademlia/Common.hs @@ -0,0 +1,48 @@ +{-# OPTIONS -fno-warn-orphans #-} +{-# LANGUAGE RecordWildCards #-} +module Data.Kademlia.Common + (NodeID, NodeInfo + ) where + +import Control.Applicative +import Data.ByteString +import Network +import Network.Socket +import Data.Serialize + + +type NodeID = ByteString +type Distance = NodeID + +-- WARN is the 'system' random suitable for this? +-- | Generate random NodeID used for the entire session. +-- Distribution of ID's should be as uniform as possible. +-- +genNodeID :: IO NodeID +genNodeID = undefined -- randomIO + +instance Serialize PortNumber where + get = fromIntegral <$> getWord16be + put = putWord16be . fromIntegral + + +data NodeAddr = NodeAddr { + nodeIP :: HostAddress + , nodePort :: PortNumber + } deriving (Show, Eq) + +instance Serialize NodeAddr where + get = NodeAddr <$> getWord32be <*> get + put NodeAddr {..} = do + putWord32be nodeIP + put nodePort + + +data NodeInfo = NodeInfo { + nodeID :: NodeID + , nodeAddr :: NodeAddr + } deriving (Show, Eq) + +instance Serialize NodeInfo where + get = NodeInfo <$> getByteString 20 <*> get + put NodeInfo {..} = put nodeID >> put nodeAddr diff --git a/src/Data/Kademlia/Routing/Bucket.hs b/src/Data/Kademlia/Routing/Bucket.hs new file mode 100644 index 00000000..8d7f3e50 --- /dev/null +++ b/src/Data/Kademlia/Routing/Bucket.hs @@ -0,0 +1,139 @@ +-- | +-- Copyright : (c) Sam T. 2013 +-- License : MIT +-- Maintainer : pxqr.sta@gmail.com +-- Stability : experimental +-- Portability : portable +-- +-- Bucket is used to +-- +-- Bucket is kept sorted by time last seen — least-recently seen +-- node at the head, most-recently seen at the tail. Reason: when we +-- insert a node into the bucket we first filter nodes with smaller +-- lifetime since they more likely leave network and we more likely +-- don't reach list end. This should reduce list traversal, we don't +-- need to reverse list in insertion routines. +-- +-- Bucket is also limited in its length — thus it's called k-bucket. +-- When bucket becomes full we should split it in two lists by +-- current span bit. Span bit is defined by depth in the routing +-- table tree. Size of the bucket should be choosen such that it's +-- very unlikely that all nodes in bucket fail within an hour of +-- each other. +-- +{-# LANGUAGE RecordWildCards #-} +module Data.Kademlia.Routing.Bucket + ( Bucket(maxSize, kvs) + + -- * Query + , size, isFull, member + + -- * Construction + , empty, singleton + + -- * Modification + , enlarge, split, insert + + -- * Defaults + , defaultBucketSize + ) where + +import Control.Applicative hiding (empty) +import Data.Bits +import Data.List as L hiding (insert) + + +type Size = Int + +data Bucket k v = Bucket { + -- | We usually use equally sized buckets in the all routing table + -- so keeping max size in each bucket lead to redundancy. Altrough + -- it allow us to use some interesting schemes in route tree. + maxSize :: Size + + -- | Key -> value pairs as described above. + -- Each key in a given bucket should be unique. + , kvs :: [(k, v)] + } + +-- | Gives /current/ size of bucket. +-- +-- forall bucket. size bucket <= maxSize bucket +-- +size :: Bucket k v -> Size +size = L.length . kvs + +isFull :: Bucket k v -> Bool +isFull Bucket {..} = L.length kvs == maxSize + +member :: Eq k => k -> Bucket k v -> Bool +member k = elem k . map fst . kvs + +empty :: Size -> Bucket k v +empty s = Bucket (max 0 s) [] + +singleton :: Size -> k -> v -> Bucket k v +singleton s k v = Bucket (max 1 s) [(k, v)] + + +-- | Increase size of a given bucket. +enlarge :: Size -> Bucket k v -> Bucket k v +enlarge additional b = b { maxSize = maxSize b + additional } + +split :: Bits k => Int -> Bucket k v -> (Bucket k v, Bucket k v) +split index Bucket {..} = + let (far, near) = partition spanBit kvs + in (Bucket maxSize near, Bucket maxSize far) + where + spanBit = (`testBit` index) . fst + + +-- move elem to the end in one traversal +moveToEnd :: Eq k => (k, v) -> Bucket k v -> Bucket k v +moveToEnd kv@(k, _) b = b { kvs = go (kvs b) } + where + go [] = [] + go (x : xs) + | fst x == k = xs ++ [kv] + | otherwise = x : go xs + +insertToEnd :: (k, v) -> Bucket k v -> Bucket k v +insertToEnd kv b = b { kvs = kvs b ++ [kv] } + +-- | * If the info already exists in bucket then move it to the end. +-- +-- * If bucket is not full then insert the info to the end. +-- +-- * If bucket is full then ping the least recently seen node. +-- Here we have a choice: +-- +-- If node respond then move it the end and discard node +-- we want to insert. +-- +-- If not remove it from the bucket and add the +-- (we want to insert) node to the end. +-- +insert :: Applicative f => Eq k + => (v -> f Bool) -- ^ Ping RPC + -> (k, v) -> Bucket k v -> f (Bucket k v) + +insert ping new bucket@(Bucket {..}) + | fst new `member` bucket = pure (new `moveToEnd` bucket) + | size bucket < maxSize = pure (new `insertToEnd` bucket) + | least : rest <- kvs = + let select alive = if alive then least else new + mk most = Bucket maxSize (rest ++ [most]) + in mk . select <$> ping (snd least) + where +-- | otherwise = pure bucket + -- WARN: or maybe error "insertBucket: max size should not be 0" ? + +lookup :: k -> Bucket k v -> Maybe v +lookup = undefined + +closest :: Int -> k -> Bucket k v -> [(k, v)] +closest = undefined + +-- | Most clients use this value for maximum bucket size. +defaultBucketSize :: Int +defaultBucketSize = 20 diff --git a/src/Data/Kademlia/Routing/Table.hs b/src/Data/Kademlia/Routing/Table.hs new file mode 100644 index 00000000..b79a0a31 --- /dev/null +++ b/src/Data/Kademlia/Routing/Table.hs @@ -0,0 +1,38 @@ +-- | +-- Copyright : (c) Sam T. 2013 +-- License : MIT +-- Maintainer : pxqr.sta@gmail.com +-- Stability : experimental +-- Portability : portable +-- +-- Routing table used to lookup . Internally it uses not balanced tree +-- +-- TODO write module synopsis +module Data.Kademlia.Routing.Table + ( Table(nodeID) + ) where + +import Control.Applicative +import Data.List as L +import Data.Maybe + +import Data.Kademlia.Routing.Tree + + +data Table k v = Table { + routeTree :: Tree k v + + -- | Set degree of parallelism in node lookup calls. + , alpha :: Int + , nodeID :: k + } + +--insert :: NodeID -> Table -> Table +--insert x t = undefined + +--closest :: InfoHash -> Table -> [NodeID] +--closest = undefined + + +-- TODO table serialization: usually we need to save table between +-- target program executions for bootstrapping diff --git a/src/Data/Kademlia/Routing/Tree.hs b/src/Data/Kademlia/Routing/Tree.hs new file mode 100644 index 00000000..522bb0c2 --- /dev/null +++ b/src/Data/Kademlia/Routing/Tree.hs @@ -0,0 +1,56 @@ +-- | +-- Copyright : (c) Sam T. 2013 +-- License : MIT +-- Maintainer : pxqr.sta@gmail.com +-- Stability : experimental +-- Portability : portable +-- +-- Routing tree should contain key -> value pairs in this way: +-- +-- * More keys that near to our node key, and less keys that far +-- from our node key. +-- +-- * Tree might be saturated. If this happen we can only update +-- buckets, but we can't add new buckets. +-- +-- Instead of using ordinary binary tree and keep track is it +-- following restrictions above (that's somewhat non-trivial) we +-- store distance -> value keys. This lead to simple data structure +-- that actually isomorphic to non-empty list. So we first map our +-- keys to distances using our node ID and store them in tree. When +-- we need to extract a pair we map distances to keys back, again +-- using our node ID. This normalization happen in routing table. +-- +module Data.Kademlia.Routing.Tree + ( Tree, empty, insert + ) where + +import Control.Applicative hiding (empty) +import Data.Bits + +import Data.Kademlia.Routing.Bucket (Bucket, split, isFull) +import qualified Data.Kademlia.Routing.Bucket as Bucket + + + +data Tree k v + = Tip (Bucket k v) + | Bin (Tree k v) (Bucket k v) + +empty :: Int -> Tree k v +empty = Tip . Bucket.empty + +insert :: Applicative f + => Bits k + => (v -> f Bool) + -> (k, v) -> Tree k v -> f (Tree k v) +insert ping (k, v) = go 0 + where + go n (Tip bucket) + | isFull bucket, (near, far) <- split n bucket + = pure (Tip near `Bin` far) + | otherwise = Tip <$> Bucket.insert ping (k, v) bucket + + go n (Bin near far) + | k `testBit` n = Bin <$> pure near <*> Bucket.insert ping (k, v) far + | otherwise = Bin <$> go (succ n) near <*> pure far -- cgit v1.2.3