src/Network/BitTorrent/Tracker.hs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337

-- |
--   Copyright   :  (c) Sam T. 2013
--   License     :  MIT
--   Maintainer  :  pxqr.sta@gmail.com
--   Stability   :  experimental
--   Portability :  non-portable
--
--   This module provides high level API for peer->tracker
--   communication. Tracker is used to discover other peers in the
--   network.
--
--   By convention most trackers support another form of request,
--   which queries the state of a given torrent (or all torrents) that
--   the tracker is managing. This module also provides a way to
--   easily request scrape info for a particular torrent list.
--
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Network.BitTorrent.Tracker
       ( withTracker, completedReq

         -- * Connection
       , TConnection(..), tconnection

         -- * Session
       , TSession
       , getPeerAddr
       , getProgress, waitInterval

         -- * Re-export
       , defaultPorts

         -- * Scrape
       , ScrapeInfo(..), Scrape
       , scrapeURL
       , scrape, scrapeOne
       ) where

import Control.Applicative
import Control.Concurrent
import Control.Concurrent.BoundedChan as BC
import Control.Concurrent.STM
import Control.Exception
import Control.Monad
import Data.BEncode
import           Data.ByteString (ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import Data.List as L
import           Data.Map (Map)
import qualified Data.Map as M
import Data.Monoid
import Data.IORef

import Network
import Network.HTTP
import Network.URI

import Data.Torrent
import Network.BitTorrent.Internal
import Network.BitTorrent.Peer
import Network.BitTorrent.Tracker.Protocol

{-----------------------------------------------------------------------
    Tracker connection
-----------------------------------------------------------------------}

-- | 'TConnection' (shorthand for Tracker session) combines tracker
-- request fields neccessary for tracker, torrent and client
-- identification.
--
--   This data is considered as static within one session.
--
data TConnection = TConnection {
    tconnAnnounce :: URI        -- ^ Announce URL.
  , tconnInfoHash :: InfoHash   -- ^ Hash of info part of current .torrent file.
  , tconnPeerID   :: PeerID     -- ^ Client peer ID.
  , tconnPort     :: PortNumber -- ^ The port number the client is listenning on.
  } deriving Show

tconnection :: Torrent -> PeerID -> PortNumber -> TConnection
tconnection t = TConnection (tAnnounce t) (tInfoHash t)


-- | used to avoid boilerplate; do NOT export me
genericReq :: TConnection -> Progress -> TRequest
genericReq ses pr =   TRequest {
    reqAnnounce   = tconnAnnounce ses
  , reqInfoHash   = tconnInfoHash ses
  , reqPeerID     = tconnPeerID   ses
  , reqPort       = tconnPort     ses

  , reqUploaded   = prUploaded   pr
  , reqDownloaded = prDownloaded pr
  , reqLeft       = prLeft       pr

  , reqIP         = Nothing
  , reqNumWant    = Nothing
  , reqEvent      = Nothing
  }


-- | The first request to the tracker that should be created is
--   'startedReq'. It includes necessary 'Started' event field.
--
startedReq :: TConnection -> Progress -> TRequest
startedReq ses pr = (genericReq ses pr) {
    reqIP         = Nothing
  , reqNumWant    = Just defaultNumWant
  , reqEvent      = Just Started
  }

-- | Regular request must be sent to keep track new peers and
--   notify tracker about current state of the client
--   so new peers could connect to the client.
--
regularReq :: Int -> TConnection -> Progress -> TRequest
regularReq numWant ses pr = (genericReq ses pr) {
    reqIP         = Nothing
  , reqNumWant    = Just numWant
  , reqEvent      = Nothing
  }

-- | Must be sent to the tracker if the client is shutting down
-- gracefully.
--
stoppedReq :: TConnection -> Progress -> TRequest
stoppedReq ses pr = (genericReq ses pr) {
    reqIP         = Nothing
  , reqNumWant    = Nothing
  , reqEvent      = Just Stopped
  }

-- | Must be sent to the tracker when the download completes.
-- However, must not be sent if the download was already 100%
-- complete.
--
completedReq :: TConnection -> Progress -> TRequest
completedReq ses pr = (genericReq ses pr) {
    reqIP         = Nothing
  , reqNumWant    = Nothing
  , reqEvent      = Just Completed
  }

{-----------------------------------------------------------------------
    Tracker session
-----------------------------------------------------------------------}

{-  Why use BoundedChan?

Because most times we need just a list of peer at the start and all
the rest time we will take little by little. On the other hand tracker
will give us some constant count of peers and channel will grow with
time. To avoid space leaks and long lists of peers (which we don't
need) we use bounded chaan.

   Chan size.

Should be at least (count_of_workers * 2) to accumulate long enough
peer list.

  Order of peers in chan.

Old peers in head, new ones in tail. Old peers should be used in the
first place because by statistics they are most likely will present in
network a long time than a new.

-}

type TimeInterval = Int

data TSession = TSession {
  -- TODO synchonize progress with client session
    seProgress   :: TVar Progress
  , seInterval   :: IORef TimeInterval
  , sePeers      :: BoundedChan PeerAddr
  }

type PeerCount = Int

defaultChanSize :: PeerCount
defaultChanSize = defaultNumWant * 2

getPeerAddr :: TSession -> IO PeerAddr
getPeerAddr = BC.readChan . sePeers

getProgress :: TSession -> IO Progress
getProgress = readTVarIO . seProgress

newSession :: PeerCount -> Progress -> TimeInterval -> [PeerAddr]
           -> IO TSession
newSession chanSize pr i ps
  | chanSize < 1
  = throwIO $ userError "size of chan should be more that 1"

  | otherwise = do
    chan <- newBoundedChan chanSize

    -- if length of the "ps" is more than the "chanSize" we will block
    -- forever; to avoid this we remove excessive peers
    let ps' = take chanSize ps
    BC.writeList2Chan chan ps'

    TSession <$> newTVarIO pr
             <*> newIORef i
             <*> pure chan

waitInterval :: TSession -> IO ()
waitInterval se @ TSession {..} = do
    delay <- readIORef seInterval
    threadDelay (delay * sec)
  where
    sec = 1000 * 1000 :: Int

withTracker :: Progress -> TConnection -> (TSession -> IO a) -> IO a
withTracker initProgress conn action = bracket start end (action . fst)
  where
    start = do
      resp <- askTracker (startedReq conn initProgress)
      se   <- newSession defaultChanSize initProgress
                         (respInterval resp) (respPeers resp)

      tid  <- forkIO (syncSession se)
      return (se, tid)

    syncSession se @ TSession {..} = forever $ do
        waitInterval se
        pr   <- getProgress se
        resp <- tryJust isIOException $ do
                    askTracker (regularReq defaultNumWant conn pr)
        case resp of
          Right (OK {..}) -> do
            writeIORef seInterval respInterval

            -- we rely on the fact that union on lists is not
            -- commutative: this implements the heuristic "old peers
            -- in head"
            old <- BC.getChanContents sePeers
            let new = respPeers
            let combined = L.union old new

            BC.writeList2Chan sePeers combined

          _ -> return ()
      where
        isIOException :: IOException -> Maybe IOException
        isIOException = return

    end (se, tid) = do
      killThread tid
      pr <- getProgress se
      leaveTracker $ stoppedReq conn pr

{-----------------------------------------------------------------------
    Scrape
-----------------------------------------------------------------------}


-- | Information about particular torrent.
data ScrapeInfo = ScrapeInfo {
    siComplete   :: Int
    -- ^ Number of seeders - peers with the entire file.
  , siDownloaded :: Int
    -- ^ Total number of times the tracker has registered a completion.
  , siIncomplete :: Int
    -- ^ Number of leechers.
  , siName       :: Maybe ByteString
    -- ^ Name of the torrent file, as specified by the "name"
    --   file in the info section of the .torrent file.
  } deriving (Show, Eq)

-- | Scrape info about a set of torrents.
type Scrape = Map InfoHash ScrapeInfo

instance BEncodable ScrapeInfo where
  toBEncode si = fromAssocs
    [ "complete"   -->  siComplete si
    , "downloaded" -->  siDownloaded si
    , "incomplete" -->  siIncomplete si
    , "name"       -->? siName si
    ]

  fromBEncode (BDict d) =
    ScrapeInfo <$> d >--  "complete"
               <*> d >--  "downloaded"
               <*> d >--  "incomplete"
               <*> d >--? "name"
  fromBEncode _ = decodingError "ScrapeInfo"

-- | Trying to convert /announce/ URL to /scrape/ URL. If 'scrapeURL'
--   gives 'Nothing' then tracker do not support scraping. The info hash
--   list is used to restrict the tracker's report to that particular
--   torrents. Note that scrapping of multiple torrents may not be
--   supported. (Even if scrapping convention is supported)
--
scrapeURL :: URI -> [InfoHash] -> Maybe URI
scrapeURL uri ihs = do
  newPath <- replace (BC.pack (uriPath uri))
  let newURI = uri { uriPath = BC.unpack newPath }
  return (foldl addHashToURI newURI ihs)
 where
    replace :: ByteString -> Maybe ByteString
    replace p
      | ps <- BC.splitWith (== '/') p
      , "announce" `B.isPrefixOf` last ps
      = let newSuff = "scrape" <> B.drop (B.length "announce") (last ps)
        in Just (B.intercalate "/" (init ps ++ [newSuff]))
      | otherwise = Nothing


-- | For each 'InfoHash' of torrents request scrape info from the tracker.
--   However if the info hash list is 'null', the tracker should list
--   all available torrents.
--   Note that the 'URI' should be /announce/ URI, not /scrape/ URI.
--
scrape :: URI                -- ^ Announce 'URI'.
       -> [InfoHash]         -- ^ Torrents to be scrapped.
       -> IO (Result Scrape) -- ^ 'ScrapeInfo' for each torrent.
scrape announce ihs
  | Just uri<- scrapeURL announce ihs = do
    rawResp  <- simpleHTTP (Request uri GET [] "")
    respBody <- getResponseBody rawResp
    return (decoded (BC.pack respBody))

  | otherwise = return (Left "Tracker do not support scraping")

-- | More particular version of 'scrape', just for one torrent.
--
scrapeOne :: URI                     -- ^ Announce 'URI'
          -> InfoHash                -- ^ Hash of the torrent info.
          -> IO (Result ScrapeInfo)  -- ^ 'ScrapeInfo' for the torrent.
scrapeOne uri ih = extract <$> scrape uri [ih]
  where
    extract (Right m)
      | Just s <- M.lookup ih m = Right s
      | otherwise = Left "unable to find info hash in response dict"
    extract (Left e) = Left e