4 files changed, 281 insertions, 0 deletions
diff --git a/src/Data/Kademlia/Common.hs b/src/Data/Kademlia/Common.hs
new file mode 100644
index 00000000..874120d8
--- /dev/null
+++ b/src/Data/Kademlia/Common.hs
@@ -0,0 +1,48 @@
+{-# OPTIONS -fno-warn-orphans #-}
+{-# LANGUAGE RecordWildCards #-}
+module Data.Kademlia.Common
+       (NodeID, NodeInfo
+       ) where
+import Control.Applicative
+import Data.ByteString
+import Network
+import Network.Socket
+import Data.Serialize
+type NodeID = ByteString
+type Distance = NodeID
+-- WARN is the 'system' random suitable for this?
+-- | Generate random NodeID used for the entire session.
+--   Distribution of ID's should be as uniform as possible.
+--
+genNodeID :: IO NodeID
+genNodeID = undefined -- randomIO
+instance Serialize PortNumber where
+  get = fromIntegral <$> getWord16be
+  put = putWord16be . fromIntegral
+data NodeAddr = NodeAddr {
+    nodeIP   :: HostAddress
+  , nodePort :: PortNumber
+  } deriving (Show, Eq)
+instance Serialize NodeAddr where
+  get = NodeAddr <$> getWord32be <*> get
+  put NodeAddr {..} = do
+    putWord32be nodeIP
+    put         nodePort
+data NodeInfo = NodeInfo {
+    nodeID   :: NodeID
+  , nodeAddr :: NodeAddr
+  } deriving (Show, Eq)
+instance Serialize NodeInfo where
+  get = NodeInfo <$> getByteString 20 <*> get
+  put NodeInfo {..} = put nodeID >> put nodeAddr
diff --git a/src/Data/Kademlia/Routing/Bucket.hs b/src/Data/Kademlia/Routing/Bucket.hs
new file mode 100644
index 00000000..8d7f3e50
--- /dev/null
+++ b/src/Data/Kademlia/Routing/Bucket.hs
@@ -0,0 +1,139 @@
+-- |
+--   Copyright   :  (c) Sam T. 2013
+--   License     :  MIT
+--   Maintainer  :  pxqr.sta@gmail.com
+--   Stability   :  experimental
+--   Portability :  portable
+--
+--   Bucket is used to
+--
+--   Bucket is kept sorted by time last seen — least-recently seen
+--   node at the head, most-recently seen at the tail. Reason: when we
+--   insert a node into the bucket we first filter nodes with smaller
+--   lifetime since they more likely leave network and we more likely
+--   don't reach list end. This should reduce list traversal, we don't
+--   need to reverse list in insertion routines.
+--
+--   Bucket is also limited in its length — thus it's called k-bucket.
+--   When bucket becomes full we should split it in two lists by
+--   current span bit. Span bit is defined by depth in the routing
+--   table tree. Size of the bucket should be choosen such that it's
+--   very unlikely that all nodes in bucket fail within an hour of
+--   each other.
+--
+{-# LANGUAGE RecordWildCards #-}
+module Data.Kademlia.Routing.Bucket
+       ( Bucket(maxSize, kvs)
+         -- * Query
+       , size, isFull, member
+         -- * Construction
+       , empty, singleton
+         -- * Modification
+       , enlarge, split, insert
+         -- * Defaults
+       , defaultBucketSize
+       ) where
+import Control.Applicative hiding (empty)
+import Data.Bits
+import Data.List as L hiding (insert)
+type Size = Int
+data Bucket k v = Bucket {
+    -- | We usually use equally sized buckets in the all routing table
+    -- so keeping max size in each bucket lead to redundancy. Altrough
+    -- it allow us to use some interesting schemes in route tree.
+    maxSize :: Size
+    -- | Key -> value pairs as described above.
+    --   Each key in a given bucket should be unique.
+  , kvs     :: [(k, v)]
+  }
+-- | Gives /current/ size of bucket.
+--
+--   forall bucket. size bucket <= maxSize bucket
+--
+size :: Bucket k v -> Size
+size = L.length . kvs
+isFull :: Bucket k v -> Bool
+isFull Bucket {..} = L.length kvs == maxSize
+member :: Eq k => k -> Bucket k v -> Bool
+member k = elem k . map fst . kvs
+empty :: Size -> Bucket k v
+empty s = Bucket (max 0 s) []
+singleton :: Size -> k -> v -> Bucket k v
+singleton s k v = Bucket (max 1 s) [(k, v)]
+-- | Increase size of a given bucket.
+enlarge :: Size -> Bucket k v -> Bucket k v
+enlarge additional b = b { maxSize = maxSize b + additional }
+split :: Bits k => Int -> Bucket k v -> (Bucket k v, Bucket k v)
+split index Bucket {..} =
+    let (far, near) = partition spanBit kvs
+    in (Bucket maxSize near, Bucket maxSize far)
+  where
+    spanBit = (`testBit` index) . fst
+-- move elem to the end in one traversal
+moveToEnd :: Eq k => (k, v) -> Bucket k v -> Bucket k v
+moveToEnd kv@(k, _) b = b { kvs = go (kvs b) }
+  where
+    go [] = []
+    go (x : xs)
+      | fst x == k = xs ++ [kv]
+      | otherwise  = x : go xs
+insertToEnd :: (k, v) -> Bucket k v -> Bucket k v
+insertToEnd kv b = b { kvs = kvs b ++ [kv] }
+-- | * If the info already exists in bucket then move it to the end.
+--
+--   * If bucket is not full then insert the info to the end.
+--
+--   * If bucket is full then ping the least recently seen node.
+--     Here we have a choice:
+--
+--         If node respond then move it the end and discard node
+--         we  want to insert.
+--
+--         If not remove it from the bucket and add the
+--         (we want to insert) node to the end.
+--
+insert :: Applicative f => Eq k
+       => (v ->  f Bool)  -- ^ Ping RPC
+       -> (k, v) -> Bucket k v -> f (Bucket k v)
+insert ping new bucket@(Bucket {..})
+    | fst new `member` bucket = pure (new `moveToEnd` bucket)
+    | size bucket < maxSize   = pure (new `insertToEnd` bucket)
+    | least : rest <- kvs     =
+      let select alive = if alive then least else new
+          mk most = Bucket maxSize (rest ++ [most])
+      in mk . select <$> ping (snd least)
+      where
+--    | otherwise                 = pure bucket
+     -- WARN: or maybe error "insertBucket: max size should not be 0" ?
+lookup :: k -> Bucket k v -> Maybe v
+lookup = undefined
+closest :: Int -> k -> Bucket k v -> [(k, v)]
+closest = undefined
+-- | Most clients use this value for maximum bucket size.
+defaultBucketSize :: Int
+defaultBucketSize = 20
diff --git a/src/Data/Kademlia/Routing/Table.hs b/src/Data/Kademlia/Routing/Table.hs
new file mode 100644
index 00000000..b79a0a31
--- /dev/null
+++ b/src/Data/Kademlia/Routing/Table.hs
@@ -0,0 +1,38 @@
+-- |
+--   Copyright   :  (c) Sam T. 2013
+--   License     :  MIT
+--   Maintainer  :  pxqr.sta@gmail.com
+--   Stability   :  experimental
+--   Portability :  portable
+--
+--   Routing table used to lookup . Internally it uses not balanced tree
+--
+-- TODO write module synopsis
+module Data.Kademlia.Routing.Table
+       ( Table(nodeID)
+       ) where
+import Control.Applicative
+import Data.List as L
+import Data.Maybe
+import Data.Kademlia.Routing.Tree
+data Table k v = Table {
+    routeTree     :: Tree k v
+    -- | Set degree of parallelism in node lookup calls.
+  , alpha         :: Int
+  , nodeID        :: k
+  }
+--insert :: NodeID -> Table -> Table
+--insert x t = undefined
+--closest :: InfoHash -> Table -> [NodeID]
+--closest = undefined
+-- TODO table serialization: usually we need to save table between
+-- target program executions for bootstrapping
diff --git a/src/Data/Kademlia/Routing/Tree.hs b/src/Data/Kademlia/Routing/Tree.hs
new file mode 100644
index 00000000..522bb0c2
--- /dev/null
+++ b/src/Data/Kademlia/Routing/Tree.hs
@@ -0,0 +1,56 @@
+-- |
+--   Copyright   :  (c) Sam T. 2013
+--   License     :  MIT
+--   Maintainer  :  pxqr.sta@gmail.com
+--   Stability   :  experimental
+--   Portability :  portable
+--
+--   Routing tree should contain key -> value pairs in this way:
+--
+--     * More keys that near to our node key, and less keys that far
+--     from our node key.
+--
+--     * Tree might be saturated. If this happen we can only update
+--     buckets, but we can't add new buckets.
+--
+--   Instead of using ordinary binary tree and keep track is it
+--   following restrictions above (that's somewhat non-trivial) we
+--   store distance -> value keys. This lead to simple data structure
+--   that actually isomorphic to non-empty list. So we first map our
+--   keys to distances using our node ID and store them in tree. When
+--   we need to extract a pair we map distances to keys back, again
+--   using our node ID. This normalization happen in routing table.
+--
+module Data.Kademlia.Routing.Tree
+       ( Tree, empty, insert
+       ) where
+import Control.Applicative hiding (empty)
+import Data.Bits
+import Data.Kademlia.Routing.Bucket (Bucket, split, isFull)
+import qualified Data.Kademlia.Routing.Bucket as Bucket
+data Tree k v
+  = Tip (Bucket k v)
+  | Bin (Tree k v)   (Bucket k v)
+empty :: Int -> Tree k v
+empty = Tip . Bucket.empty
+insert :: Applicative f
+       => Bits k
+       => (v -> f Bool)
+       -> (k, v) -> Tree k v -> f (Tree k v)
+insert ping (k, v) = go 0
+  where
+    go n (Tip bucket)
+      | isFull bucket, (near, far) <- split n bucket
+                          = pure (Tip near `Bin` far)
+      |     otherwise     = Tip <$> Bucket.insert ping (k, v) bucket
+    go n (Bin near far)
+      | k `testBit` n = Bin <$> pure near <*> Bucket.insert ping (k, v) far
+      | otherwise     = Bin <$> go (succ n) near <*> pure far

diff --git a/src/Data/Kademlia/Common.hs b/src/Data/Kademlia/Common.hs new file mode 100644 index 00000000..874120d8 --- /dev/null +++ b/src/Data/Kademlia/Common.hs
@@ -0,0 +1,48 @@
	1	{-# OPTIONS -fno-warn-orphans #-}
	2	{-# LANGUAGE RecordWildCards #-}
	3	module Data.Kademlia.Common
	4	(NodeID, NodeInfo
	5	) where
	6
	7	import Control.Applicative
	8	import Data.ByteString
	9	import Network
	10	import Network.Socket
	11	import Data.Serialize
	12
	13
	14	type NodeID = ByteString
	15	type Distance = NodeID
	16
	17	-- WARN is the 'system' random suitable for this?
	18	-- \| Generate random NodeID used for the entire session.
	19	-- Distribution of ID's should be as uniform as possible.
	20	--
	21	genNodeID :: IO NodeID
	22	genNodeID = undefined -- randomIO
	23
	24	instance Serialize PortNumber where
	25	get = fromIntegral <$> getWord16be
	26	put = putWord16be . fromIntegral
	27
	28
	29	data NodeAddr = NodeAddr {
	30	nodeIP :: HostAddress
	31	, nodePort :: PortNumber
	32	} deriving (Show, Eq)
	33
	34	instance Serialize NodeAddr where
	35	get = NodeAddr <$> getWord32be <*> get
	36	put NodeAddr {..} = do
	37	putWord32be nodeIP
	38	put nodePort
	39
	40
	41	data NodeInfo = NodeInfo {
	42	nodeID :: NodeID
	43	, nodeAddr :: NodeAddr
	44	} deriving (Show, Eq)
	45
	46	instance Serialize NodeInfo where
	47	get = NodeInfo <$> getByteString 20 <*> get
	48	put NodeInfo {..} = put nodeID >> put nodeAddr


diff --git a/src/Data/Kademlia/Routing/Bucket.hs b/src/Data/Kademlia/Routing/Bucket.hs new file mode 100644 index 00000000..8d7f3e50 --- /dev/null +++ b/src/Data/Kademlia/Routing/Bucket.hs
@@ -0,0 +1,139 @@
	1	-- \|
	2	-- Copyright : (c) Sam T. 2013
	3	-- License : MIT
	4	-- Maintainer : pxqr.sta@gmail.com
	5	-- Stability : experimental
	6	-- Portability : portable
	7	--
	8	-- Bucket is used to
	9	--
	10	-- Bucket is kept sorted by time last seen — least-recently seen
	11	-- node at the head, most-recently seen at the tail. Reason: when we
	12	-- insert a node into the bucket we first filter nodes with smaller
	13	-- lifetime since they more likely leave network and we more likely
	14	-- don't reach list end. This should reduce list traversal, we don't
	15	-- need to reverse list in insertion routines.
	16	--
	17	-- Bucket is also limited in its length — thus it's called k-bucket.
	18	-- When bucket becomes full we should split it in two lists by
	19	-- current span bit. Span bit is defined by depth in the routing
	20	-- table tree. Size of the bucket should be choosen such that it's
	21	-- very unlikely that all nodes in bucket fail within an hour of
	22	-- each other.
	23	--
	24	{-# LANGUAGE RecordWildCards #-}
	25	module Data.Kademlia.Routing.Bucket
	26	( Bucket(maxSize, kvs)
	27
	28	-- * Query
	29	, size, isFull, member
	30
	31	-- * Construction
	32	, empty, singleton
	33
	34	-- * Modification
	35	, enlarge, split, insert
	36
	37	-- * Defaults
	38	, defaultBucketSize
	39	) where
	40
	41	import Control.Applicative hiding (empty)
	42	import Data.Bits
	43	import Data.List as L hiding (insert)
	44
	45
	46	type Size = Int
	47
	48	data Bucket k v = Bucket {
	49	-- \| We usually use equally sized buckets in the all routing table
	50	-- so keeping max size in each bucket lead to redundancy. Altrough
	51	-- it allow us to use some interesting schemes in route tree.
	52	maxSize :: Size
	53
	54	-- \| Key -> value pairs as described above.
	55	-- Each key in a given bucket should be unique.
	56	, kvs :: [(k, v)]
	57	}
	58
	59	-- \| Gives /current/ size of bucket.
	60	--
	61	-- forall bucket. size bucket <= maxSize bucket
	62	--
	63	size :: Bucket k v -> Size
	64	size = L.length . kvs
	65
	66	isFull :: Bucket k v -> Bool
	67	isFull Bucket {..} = L.length kvs == maxSize
	68
	69	member :: Eq k => k -> Bucket k v -> Bool
	70	member k = elem k . map fst . kvs
	71
	72	empty :: Size -> Bucket k v
	73	empty s = Bucket (max 0 s) []
	74
	75	singleton :: Size -> k -> v -> Bucket k v
	76	singleton s k v = Bucket (max 1 s) [(k, v)]
	77
	78
	79	-- \| Increase size of a given bucket.
	80	enlarge :: Size -> Bucket k v -> Bucket k v
	81	enlarge additional b = b { maxSize = maxSize b + additional }
	82
	83	split :: Bits k => Int -> Bucket k v -> (Bucket k v, Bucket k v)
	84	split index Bucket {..} =
	85	let (far, near) = partition spanBit kvs
	86	in (Bucket maxSize near, Bucket maxSize far)
	87	where
	88	spanBit = (`testBit` index) . fst
	89
	90
	91	-- move elem to the end in one traversal
	92	moveToEnd :: Eq k => (k, v) -> Bucket k v -> Bucket k v
	93	moveToEnd kv@(k, _) b = b { kvs = go (kvs b) }
	94	where
	95	go [] = []
	96	go (x : xs)
	97	\| fst x == k = xs ++ [kv]
	98	\| otherwise = x : go xs
	99
	100	insertToEnd :: (k, v) -> Bucket k v -> Bucket k v
	101	insertToEnd kv b = b { kvs = kvs b ++ [kv] }
	102
	103	-- \| * If the info already exists in bucket then move it to the end.
	104	--
	105	-- * If bucket is not full then insert the info to the end.
	106	--
	107	-- * If bucket is full then ping the least recently seen node.
	108	-- Here we have a choice:
	109	--
	110	-- If node respond then move it the end and discard node
	111	-- we want to insert.
	112	--
	113	-- If not remove it from the bucket and add the
	114	-- (we want to insert) node to the end.
	115	--
	116	insert :: Applicative f => Eq k
	117	=> (v -> f Bool) -- ^ Ping RPC
	118	-> (k, v) -> Bucket k v -> f (Bucket k v)
	119
	120	insert ping new bucket@(Bucket {..})
	121	\| fst new `member` bucket = pure (new `moveToEnd` bucket)
	122	\| size bucket < maxSize = pure (new `insertToEnd` bucket)
	123	\| least : rest <- kvs =
	124	let select alive = if alive then least else new
	125	mk most = Bucket maxSize (rest ++ [most])
	126	in mk . select <$> ping (snd least)
	127	where
	128	-- \| otherwise = pure bucket
	129	-- WARN: or maybe error "insertBucket: max size should not be 0" ?
	130
	131	lookup :: k -> Bucket k v -> Maybe v
	132	lookup = undefined
	133
	134	closest :: Int -> k -> Bucket k v -> [(k, v)]
	135	closest = undefined
	136
	137	-- \| Most clients use this value for maximum bucket size.
	138	defaultBucketSize :: Int
	139	defaultBucketSize = 20


diff --git a/src/Data/Kademlia/Routing/Table.hs b/src/Data/Kademlia/Routing/Table.hs new file mode 100644 index 00000000..b79a0a31 --- /dev/null +++ b/src/Data/Kademlia/Routing/Table.hs
@@ -0,0 +1,38 @@
	1	-- \|
	2	-- Copyright : (c) Sam T. 2013
	3	-- License : MIT
	4	-- Maintainer : pxqr.sta@gmail.com
	5	-- Stability : experimental
	6	-- Portability : portable
	7	--
	8	-- Routing table used to lookup . Internally it uses not balanced tree
	9	--
	10	-- TODO write module synopsis
	11	module Data.Kademlia.Routing.Table
	12	( Table(nodeID)
	13	) where
	14
	15	import Control.Applicative
	16	import Data.List as L
	17	import Data.Maybe
	18
	19	import Data.Kademlia.Routing.Tree
	20
	21
	22	data Table k v = Table {
	23	routeTree :: Tree k v
	24
	25	-- \| Set degree of parallelism in node lookup calls.
	26	, alpha :: Int
	27	, nodeID :: k
	28	}
	29
	30	--insert :: NodeID -> Table -> Table
	31	--insert x t = undefined
	32
	33	--closest :: InfoHash -> Table -> [NodeID]
	34	--closest = undefined
	35
	36
	37	-- TODO table serialization: usually we need to save table between
	38	-- target program executions for bootstrapping


diff --git a/src/Data/Kademlia/Routing/Tree.hs b/src/Data/Kademlia/Routing/Tree.hs new file mode 100644 index 00000000..522bb0c2 --- /dev/null +++ b/src/Data/Kademlia/Routing/Tree.hs
@@ -0,0 +1,56 @@
	1	-- \|
	2	-- Copyright : (c) Sam T. 2013
	3	-- License : MIT
	4	-- Maintainer : pxqr.sta@gmail.com
	5	-- Stability : experimental
	6	-- Portability : portable
	7	--
	8	-- Routing tree should contain key -> value pairs in this way:
	9	--
	10	-- * More keys that near to our node key, and less keys that far
	11	-- from our node key.
	12	--
	13	-- * Tree might be saturated. If this happen we can only update
	14	-- buckets, but we can't add new buckets.
	15	--
	16	-- Instead of using ordinary binary tree and keep track is it
	17	-- following restrictions above (that's somewhat non-trivial) we
	18	-- store distance -> value keys. This lead to simple data structure
	19	-- that actually isomorphic to non-empty list. So we first map our
	20	-- keys to distances using our node ID and store them in tree. When
	21	-- we need to extract a pair we map distances to keys back, again
	22	-- using our node ID. This normalization happen in routing table.
	23	--
	24	module Data.Kademlia.Routing.Tree
	25	( Tree, empty, insert
	26	) where
	27
	28	import Control.Applicative hiding (empty)
	29	import Data.Bits
	30
	31	import Data.Kademlia.Routing.Bucket (Bucket, split, isFull)
	32	import qualified Data.Kademlia.Routing.Bucket as Bucket
	33
	34
	35
	36	data Tree k v
	37	= Tip (Bucket k v)
	38	\| Bin (Tree k v) (Bucket k v)
	39
	40	empty :: Int -> Tree k v
	41	empty = Tip . Bucket.empty
	42
	43	insert :: Applicative f
	44	=> Bits k
	45	=> (v -> f Bool)
	46	-> (k, v) -> Tree k v -> f (Tree k v)
	47	insert ping (k, v) = go 0
	48	where
	49	go n (Tip bucket)
	50	\| isFull bucket, (near, far) <- split n bucket
	51	= pure (Tip near `Bin` far)
	52	\| otherwise = Tip <$> Bucket.insert ping (k, v) bucket
	53
	54	go n (Bin near far)
	55	\| k `testBit` n = Bin <$> pure near <*> Bucket.insert ping (k, v) far
	56	\| otherwise = Bin <$> go (succ n) near <*> pure far