summaryrefslogtreecommitdiff
path: root/src/Data/Torrent.hs
blob: 5971aa9c0edc3a3ae60dac6169da0e794eb63de7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
-- |
--   Copyright   :  (c) Sam T. 2013
--   License     :  MIT
--   Maintainer  :  pxqr.sta@gmail.com
--   Stability   :  experimental
--   Portability :  portable
--
--   Torrent file contains metadata about files and folders but not
--   content itself. The files are bencoded dictionaries. There is
--   also other info which is used to help join the swarm.
--
--   This module provides torrent metainfo serialization and info hash
--   extraction.
--
--   For more info see:
--   <http://www.bittorrent.org/beps/bep_0003.html#metainfo-files>,
--   <https://wiki.theory.org/BitTorrentSpecification#Metainfo_File_Structure>
--
{-# OPTIONS -fno-warn-orphans #-}
{-# LANGUAGE CPP               #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards   #-}
{-# LANGUAGE BangPatterns      #-}
{-# LANGUAGE TemplateHaskell   #-}
-- TODO refine interface
module Data.Torrent
       ( -- * Torrent
         Torrent(..), ContentInfo(..), FileInfo(..)
       , mktorrent, simpleTorrent
       , torrentExt, isTorrentPath
       , fromFile

         -- * Files layout
       , Layout, contentLayout
       , contentLength, fileOffset
       , pieceCount, blockCount
       , isSingleFile, isMultiFile

       , checkPiece

         -- * Info hash
#if defined (TESTING)
       , InfoHash(..)
#else
       , InfoHash(..)
#endif
       , ppInfoHash
       , addHashToURI

         -- * Extra
       , sizeInBase

#if defined (TESTING)
         -- * Internal
       , Data.Torrent.hash
       , Data.Torrent.hashlazy
       , layoutOffsets
#endif
       ) where

import Prelude hiding (sum)

import Control.Applicative
import Control.Arrow
import Control.Exception
import Control.Monad

import qualified Crypto.Hash.SHA1 as C

import Data.Aeson.TH
import Data.BEncode as BE
import Data.Char
import Data.Foldable
import qualified Data.ByteString as B
import           Data.ByteString.Internal
import qualified Data.ByteString.Char8 as BC (pack, unpack)
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.ByteString.Lazy.Builder as B
import qualified Data.ByteString.Lazy.Builder.ASCII as B
import qualified Data.List as L
import           Data.Maybe
import           Data.Map (Map)
import qualified Data.Map as M
import           Data.Hashable as Hashable
import           Data.Text (Text)
import           Data.Serialize as S hiding (Result)
import           Text.PrettyPrint
import           Text.ParserCombinators.ReadP as P
import           Text.Read

import Network.URI
import System.FilePath
import Numeric

{-----------------------------------------------------------------------
    Info hash
-----------------------------------------------------------------------}

-- | Exactly 20 bytes long SHA1 hash of the info part of torrent file.
newtype InfoHash = InfoHash { getInfoHash :: ByteString }
                   deriving (Eq, Ord)

instance Show InfoHash where
  show = render . ppInfoHash

instance Read InfoHash where
  readsPrec _ = readP_to_S $ do
      str <- replicateM 40 (satisfy isHexDigit)
      return $ InfoHash $ decodeIH str
    where
      decodeIH       = B.pack . map fromHex . pair
      fromHex (a, b) = read $ '0' : 'x' : a : b : []

      pair (a : b : xs) = (a, b) : pair xs
      pair _            = []

instance Hashable InfoHash where
  hash = Hashable.hash . getInfoHash

instance BEncodable InfoHash where
  toBEncode = toBEncode . getInfoHash
  fromBEncode be = InfoHash <$> fromBEncode be

instance Serialize InfoHash where
  put = putByteString . getInfoHash
  get = InfoHash <$> getBytes 20

instance BEncodable a => BEncodable (Map InfoHash a) where
  {-# SPECIALIZE instance BEncodable a => BEncodable (Map InfoHash a)  #-}
  fromBEncode b = M.mapKeys InfoHash <$> fromBEncode b
  {-# INLINE fromBEncode #-}

  toBEncode = toBEncode . M.mapKeys getInfoHash
  {-# INLINE toBEncode #-}

-- | Hash strict bytestring using SHA1 algorithm.
hash :: ByteString -> InfoHash
hash = InfoHash . C.hash

-- | Hash lazy bytestring using SHA1 algorithm.
hashlazy :: Lazy.ByteString -> InfoHash
hashlazy = InfoHash . C.hashlazy

-- | Pretty print info hash in hexadecimal format.
ppInfoHash :: InfoHash -> Doc
ppInfoHash = text . BC.unpack . ppHex . getInfoHash

ppHex :: ByteString -> ByteString
ppHex = Lazy.toStrict . B.toLazyByteString . B.byteStringHexFixed

-- | Add query info hash parameter to uri.
--
--   > info_hash=<url_encoded_info_hash>
--
addHashToURI :: URI -> InfoHash -> URI
addHashToURI uri s = uri {
    uriQuery = uriQuery uri ++ mkPref (uriQuery uri) ++
               "info_hash=" ++ rfc1738Encode (BC.unpack (getInfoHash s))
  }
  where
    mkPref [] = "?"
    mkPref ('?' : _) = "&"
    mkPref _ = error "addHashToURI"

    rfc1738Encode = L.concatMap (\c -> if unreservedS c then [c] else encodeHex c)
      where
        unreservedS = (`L.elem` chars)
        chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_./"
        encodeHex c = '%' : pHex c
        pHex c = let p = (showHex . ord $ c) ""
                 in if L.length p == 1 then '0' : p else p

{-----------------------------------------------------------------------
    Torrent metainfo
-----------------------------------------------------------------------}

type Time = Text

-- | Contain info about one single file.
data FileInfo = FileInfo {
      fiLength      :: !Integer
      -- ^ Length of the file in bytes.

    , fiMD5sum      ::  Maybe ByteString
      -- ^ 32 character long MD5 sum of the file.
      --   Used by third-party tools, not by bittorrent protocol itself.

    , fiPath        :: ![ByteString]
      -- ^ One or more string elements that together represent the
      --   path and filename. Each element in the list corresponds to
      --   either a directory name or (in the case of the last
      --   element) the filename.  For example, the file:
      --
      --   > "dir1/dir2/file.ext"
      --
      --   would consist of three string elements:
      --
      --   > ["dir1", "dir2", "file.ext"]
      --
    } deriving (Show, Read, Eq)

$(deriveJSON (L.map toLower . L.dropWhile isLower) ''FileInfo)


-- | Info part of the .torrent file contain info about each content file.
data ContentInfo =
    SingleFile {
      ciLength       :: !Integer
      -- ^ Length of the file in bytes.

    , ciMD5sum       :: Maybe ByteString
      -- ^ 32 character long MD5 sum of the file.
      --   Used by third-party tools, not by bittorrent protocol itself.

    , ciName         :: !ByteString
      -- ^ Suggested name of the file single file.



    , ciPieceLength  :: !Int
      -- ^ Number of bytes in each piece.

    , ciPieces       :: !ByteString
      -- ^ Concatenation of all 20-byte SHA1 hash values.

    , ciPrivate      :: Maybe Bool
      -- ^ If set the client MUST publish its presence to get other
      -- peers ONLY via the trackers explicity described in the
      -- metainfo file.
      --
      --   BEP 27: <http://www.bittorrent.org/beps/bep_0027.html>
    }

  | MultiFile {
      ciFiles        :: ![FileInfo]
      -- ^ List of the all files that torrent contains.

    , ciName         :: !ByteString
      -- | The file path of the directory in which to store all the files.

    , ciPieceLength  :: !Int
    , ciPieces       :: !ByteString
    , ciPrivate      :: Maybe Bool
    } deriving (Show, Read, Eq)

$(deriveJSON id ''ContentInfo)

-- TODO more convenient form of torrent info.
-- | Metainfo about particular torrent.
data Torrent = Torrent {
      tInfoHash     :: !InfoHash
      -- ^ SHA1 hash of the 'TorrentInfo' of the 'Torrent'.

    , tAnnounce     :: !URI
      -- ^ The URL of the tracker.

      -- NOTE: out of lexicographic order!
    , tInfo         :: !ContentInfo
      -- ^ Info about each content file.

    , tAnnounceList :: Maybe [[URI]]
      -- ^ Announce list add multiple tracker support.
      --
      --   BEP 12: <http://www.bittorrent.org/beps/bep_0012.html>

    , tComment      :: Maybe Text
      -- ^ Free-form comments of the author.

    , tCreatedBy    :: Maybe ByteString
      -- ^ Name and version of the program used to create the .torrent.

    , tCreationDate :: Maybe Time
      -- ^ Creation time of the torrent, in standard UNIX epoch.

    , tEncoding     :: Maybe ByteString
      -- ^ String encoding format used to generate the pieces part of
      --   the info dictionary in the .torrent metafile.

    , tPublisher    :: Maybe URI
      -- ^ Containing the RSA public key of the publisher of the
      -- torrent.  Private counterpart of this key that has the
      -- authority to allow new peers onto the swarm.

    , tPublisherURL :: Maybe URI
    , tSignature    :: Maybe ByteString
      -- ^ The RSA signature of the info dictionary (specifically,
      --   the encrypted SHA-1 hash of the info dictionary).
    } deriving (Show, Eq)

instance Hashable Torrent where
  hash = Hashable.hash . tInfoHash

{- note that info hash is actually reduntant field
   but it's better to keep it here to avoid heavy recomputations
-}

-- | Smart constructor for 'Torrent' which compute info hash.
mktorrent :: URI            -> ContentInfo
        -> Maybe [[URI]]  -> Maybe Text       -> Maybe ByteString
        -> Maybe Time     -> Maybe ByteString -> Maybe URI
        -> Maybe URI      -> Maybe ByteString
        -> Torrent
mktorrent announce info = Torrent (hashlazy (BE.encoded info)) announce info

-- | A simple torrent contains only required fields.
simpleTorrent :: URI -> ContentInfo -> Torrent
simpleTorrent announce info = mktorrent announce info
                              Nothing Nothing Nothing
                              Nothing Nothing Nothing
                              Nothing Nothing

-- TODO check if pieceLength is power of 2

instance BEncodable URI where
  toBEncode uri = toBEncode (BC.pack (uriToString id uri ""))
  {-# INLINE toBEncode #-}

  fromBEncode (BString s) | Just url <- parseURI (BC.unpack s) = return url
  fromBEncode b           = decodingError $ "url <" ++ show b ++ ">"
  {-# INLINE fromBEncode #-}

instance BEncodable Torrent where
  toBEncode Torrent {..} = fromAscAssocs
    [ "announce"      -->  tAnnounce
    , "announce-list" -->? tAnnounceList
    , "comment"       -->? tComment
    , "created by"    -->? tCreatedBy
    , "creation date" -->? tCreationDate
    , "encoding"      -->? tEncoding
    , "info"          -->  tInfo
    , "publisher"     -->? tPublisher
    , "publisher-url" -->? tPublisherURL
    , "signature"     -->? tSignature
    ]

  fromBEncode (BDict d) | Just info <- M.lookup "info" d =
    Torrent <$> pure (hashlazy (BE.encode info)) -- WARN
            <*> d >--  "announce"
            <*> d >--  "info"
            <*> d >--? "announce-list"
            <*> d >--? "comment"
            <*> d >--? "created by"
            <*> d >--? "creation date"
            <*> d >--? "encoding"
            <*> d >--? "publisher"
            <*> d >--? "publisher-url"
            <*> d >--? "signature"

  fromBEncode _ = decodingError "Torrent"


instance BEncodable ContentInfo where
  toBEncode SingleFile {..}  = fromAscAssocs
    [ "length"       -->  ciLength
    , "md5sum"       -->? ciMD5sum
    , "name"         -->  ciName

    , "piece length" -->  ciPieceLength
    , "pieces"       -->  ciPieces
    , "private"      -->? ciPrivate
    ]

  toBEncode MultiFile {..} = fromAscAssocs
    [ "files"        -->  ciFiles
    , "name"         -->  ciName

    , "piece length" -->  ciPieceLength
    , "pieces"       -->  ciPieces
    , "private"      -->? ciPrivate
    ]

  fromBEncode (BDict d)
    | Just (BList fs) <- M.lookup "files" d =
      MultiFile   <$> mapM fromBEncode fs
                  <*> d >--  "name"
                  <*> d >--  "piece length"
                  <*> d >--  "pieces"
                  <*> d >--? "private"
    | otherwise =
      SingleFile  <$> d >--  "length"
                  <*> d >--? "md5sum"
                  <*> d >--  "name"
                  <*> d >--  "piece length"
                  <*> d >--  "pieces"
                  <*> d >--? "private"
  fromBEncode _ = decodingError "ContentInfo"


instance BEncodable FileInfo where
  toBEncode FileInfo {..} = fromAssocs
                 [ "length" -->  fiLength
                 , "md5sum" -->? fiMD5sum
                 , "path"   -->  fiPath
                 ]

  fromBEncode (BDict d) =
    FileInfo <$> d >--  "length"
                <*> d >--? "md5sum"
                <*> d >--  "path"

  fromBEncode _ = decodingError "FileInfo"


-- | Divide and round up.
sizeInBase :: Integral a => a -> Int -> Int
sizeInBase n b = fromIntegral (n `div` fromIntegral b) + align
  where
    align = if n `mod` fromIntegral b == 0 then 0 else 1
{-# SPECIALIZE sizeInBase :: Int -> Int -> Int #-}
{-# SPECIALIZE sizeInBase :: Integer -> Int -> Int #-}


-- | Find sum of sizes of the all torrent files.
contentLength :: ContentInfo -> Integer
contentLength SingleFile { ciLength = len } = len
contentLength MultiFile  { ciFiles  = tfs } = sum (map fiLength tfs)

-- | Find count of pieces in the torrent. If torrent size is not a
-- multiple of piece size then the count is rounded up.
pieceCount :: ContentInfo -> Int
pieceCount ci = contentLength ci `sizeInBase` ciPieceLength ci

-- | Find number of blocks of the specified size. If torrent size is
-- not a multiple of block size then the count is rounded up.
blockCount :: Int         -- ^ Block size.
           -> ContentInfo -- ^ Torrent content info.
           -> Int         -- ^ Number of blocks.
blockCount blkSize ci = contentLength ci `sizeInBase` blkSize

-- | File layout specifies the order and the size of each file in the
--   storage. Note that order of files is highly important since we
--   coalesce all the files in the given order to get the linear block
--   address space.
--
type Layout = [(FilePath, Int)]

-- | Extract files layout from torrent info with the given root path.
contentLayout :: FilePath    -- ^ Root path for the all torrent files.
              -> ContentInfo -- ^ Torrent content information.
              -> Layout      -- ^ The all file paths prefixed with the
                             -- given root.
contentLayout rootPath = filesLayout
  where
    filesLayout   (SingleFile { ciName = name, ciLength = len })
      = [(rootPath </> BC.unpack name, fromIntegral len)]
    filesLayout   (MultiFile  { ciFiles = fs, ciName = dir }) =
      map (first mkPath . fl) fs
     where   -- TODO use utf8 encoding in name
        mkPath = ((rootPath </> BC.unpack dir) </>) . joinPath . map BC.unpack

    fl (FileInfo { fiPath = p, fiLength = len }) = (p, fromIntegral len)

layoutOffsets :: Layout -> Layout
layoutOffsets = go 0
  where
    go !_ [] = []
    go !offset ((n, s) : xs) = (n, offset) : go (offset + s) xs

-- | Gives global offset of a content file for a given full path.
fileOffset :: FilePath -> ContentInfo -> Maybe Integer
fileOffset fullPath
  = fmap fromIntegral . lookup fullPath . layoutOffsets . contentLayout ""

-- | Test if this is single file torrent.
isSingleFile :: ContentInfo -> Bool
isSingleFile SingleFile {} = True
isSingleFile _             = False

-- | Test if this is multifile torrent.
isMultiFile :: ContentInfo -> Bool
isMultiFile MultiFile {} = True
isMultiFile _            = False

slice :: Int -> Int -> ByteString -> ByteString
slice from to = B.take to . B.drop from

-- | Extract validation hash by specified piece index.
pieceHash :: ContentInfo -> Int -> ByteString
pieceHash ci ix = slice (hashsize * ix) hashsize (ciPieces ci)
  where
    hashsize   = 20

-- | Validate piece with metainfo hash.
checkPiece :: ContentInfo -> Int -> ByteString -> Bool
checkPiece ci ix piece
  =  B.length piece == ciPieceLength ci
  && C.hash   piece == pieceHash ci ix

-- | Extension usually used for torrent metafiles.
torrentExt :: String
torrentExt = "torrent"

-- | Test if this path has proper extension.
isTorrentPath :: FilePath -> Bool
isTorrentPath filepath = takeExtension filepath == extSeparator : torrentExt

-- | Read and decode a .torrent file.
fromFile :: FilePath -> IO Torrent
fromFile filepath = do
  contents <- B.readFile filepath
  case decoded contents of
    Right !t -> return t
    Left msg -> throwIO $ userError  $ msg ++ " while reading torrent"