summaryrefslogtreecommitdiff
path: root/src/Data/Torrent.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Data/Torrent.hs')
-rw-r--r--src/Data/Torrent.hs511
1 files changed, 0 insertions, 511 deletions
diff --git a/src/Data/Torrent.hs b/src/Data/Torrent.hs
deleted file mode 100644
index 36974ccf..00000000
--- a/src/Data/Torrent.hs
+++ /dev/null
@@ -1,511 +0,0 @@
1-- |
2-- Copyright : (c) Sam T. 2013
3-- License : MIT
4-- Maintainer : pxqr.sta@gmail.com
5-- Stability : experimental
6-- Portability : portable
7--
8-- Torrent file contains metadata about files and folders but not
9-- content itself. The files are bencoded dictionaries. There is
10-- also other info which is used to help join the swarm.
11--
12-- This module provides torrent metainfo serialization and info hash
13-- extraction.
14--
15-- For more info see:
16-- <http://www.bittorrent.org/beps/bep_0003.html#metainfo-files>,
17-- <https://wiki.theory.org/BitTorrentSpecification#Metainfo_File_Structure>
18--
19{-# LANGUAGE CPP #-}
20{-# LANGUAGE FlexibleInstances #-}
21{-# LANGUAGE BangPatterns #-}
22{-# LANGUAGE GeneralizedNewtypeDeriving #-}
23{-# LANGUAGE TemplateHaskell #-}
24{-# OPTIONS -fno-warn-orphans #-}
25-- TODO refine interface
26module Data.Torrent
27 ( -- * Torrent
28 Torrent(..), ContentInfo(..), FileInfo(..)
29 , mktorrent, simpleTorrent
30 , torrentExt, isTorrentPath
31 , fromFile
32
33 -- * Files layout
34 , Layout, contentLayout
35 , contentLength, fileCount, fileOffset
36 , pieceCount, blockCount
37 , isSingleFile, isMultiFile
38
39 , checkPiece
40
41 -- * Info hash
42#if defined (TESTING)
43 , InfoHash(..)
44#else
45 , InfoHash(..)
46#endif
47 , ppInfoHash
48 , addHashToURI
49
50 -- * Extra
51 , sizeInBase
52
53-- #if defined (TESTING)
54 -- * Internal
55 , Data.Torrent.hash
56 , Data.Torrent.hashlazy
57 , layoutOffsets
58 , slice
59-- #endif
60 ) where
61
62import Prelude hiding (sum)
63
64import Control.Applicative
65import Control.Arrow
66import Control.Exception
67import Control.Monad
68
69import qualified Crypto.Hash.SHA1 as C
70
71import Data.Aeson
72import Data.Aeson.TH
73import Data.BEncode as BE
74import Data.Char
75import Data.Foldable
76import qualified Data.ByteString as B
77import Data.ByteString.Internal
78import qualified Data.ByteString.Char8 as BC (pack, unpack)
79import qualified Data.ByteString.Lazy as Lazy
80import qualified Data.ByteString.Lazy.Builder as B
81import qualified Data.ByteString.Lazy.Builder.ASCII as B
82import qualified Data.List as L
83import Data.Map (Map)
84import qualified Data.Map as M
85import Data.Hashable as Hashable
86import Data.Text (Text)
87import Data.Serialize as S hiding (Result)
88import Text.PrettyPrint
89import Text.ParserCombinators.ReadP as P
90
91import Network.URI
92import System.FilePath
93import Numeric
94
95{-----------------------------------------------------------------------
96 Info hash
97-----------------------------------------------------------------------}
98
99-- | Exactly 20 bytes long SHA1 hash of the info part of torrent file.
100newtype InfoHash = InfoHash { getInfoHash :: ByteString }
101 deriving (Eq, Ord, ToJSON, FromJSON)
102
103instance Show InfoHash where
104 show = render . ppInfoHash
105
106instance Read InfoHash where
107 readsPrec _ = readP_to_S $ do
108 str <- replicateM 40 (satisfy isHexDigit)
109 return $ InfoHash $ decodeIH str
110 where
111 decodeIH = B.pack . map fromHex . pair
112 fromHex (a, b) = read $ '0' : 'x' : a : b : []
113
114 pair (a : b : xs) = (a, b) : pair xs
115 pair _ = []
116
117instance Hashable InfoHash where
118 hash = Hashable.hash . getInfoHash
119
120instance BEncodable InfoHash where
121 toBEncode = toBEncode . getInfoHash
122 fromBEncode be = InfoHash <$> fromBEncode be
123
124instance Serialize InfoHash where
125 put = putByteString . getInfoHash
126 get = InfoHash <$> getBytes 20
127
128instance BEncodable a => BEncodable (Map InfoHash a) where
129 {-# SPECIALIZE instance BEncodable a => BEncodable (Map InfoHash a) #-}
130 fromBEncode b = M.mapKeys InfoHash <$> fromBEncode b
131 {-# INLINE fromBEncode #-}
132
133 toBEncode = toBEncode . M.mapKeys getInfoHash
134 {-# INLINE toBEncode #-}
135
136-- | Hash strict bytestring using SHA1 algorithm.
137hash :: ByteString -> InfoHash
138hash = InfoHash . C.hash
139
140-- | Hash lazy bytestring using SHA1 algorithm.
141hashlazy :: Lazy.ByteString -> InfoHash
142hashlazy = InfoHash . C.hashlazy
143
144-- | Pretty print info hash in hexadecimal format.
145ppInfoHash :: InfoHash -> Doc
146ppInfoHash = text . BC.unpack . ppHex . getInfoHash
147
148ppHex :: ByteString -> ByteString
149ppHex = Lazy.toStrict . B.toLazyByteString . B.byteStringHexFixed
150
151-- | Add query info hash parameter to uri.
152--
153-- > info_hash=<url_encoded_info_hash>
154--
155addHashToURI :: URI -> InfoHash -> URI
156addHashToURI uri s = uri {
157 uriQuery = uriQuery uri ++ mkPref (uriQuery uri) ++
158 "info_hash=" ++ rfc1738Encode (BC.unpack (getInfoHash s))
159 }
160 where
161 mkPref [] = "?"
162 mkPref ('?' : _) = "&"
163 mkPref _ = error "addHashToURI"
164
165 rfc1738Encode = L.concatMap (\c -> if unreservedS c then [c] else encodeHex c)
166 where
167 unreservedS = (`L.elem` chars)
168 chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_./"
169 encodeHex c = '%' : pHex c
170 pHex c = let p = (showHex . ord $ c) ""
171 in if L.length p == 1 then '0' : p else p
172
173{-----------------------------------------------------------------------
174 Torrent metainfo
175-----------------------------------------------------------------------}
176
177type Time = Text
178
179-- | Contain info about one single file.
180data FileInfo = FileInfo {
181 fiLength :: !Integer
182 -- ^ Length of the file in bytes.
183
184 , fiMD5sum :: Maybe ByteString
185 -- ^ 32 character long MD5 sum of the file.
186 -- Used by third-party tools, not by bittorrent protocol itself.
187
188 , fiPath :: ![ByteString]
189 -- ^ One or more string elements that together represent the
190 -- path and filename. Each element in the list corresponds to
191 -- either a directory name or (in the case of the last
192 -- element) the filename. For example, the file:
193 --
194 -- > "dir1/dir2/file.ext"
195 --
196 -- would consist of three string elements:
197 --
198 -- > ["dir1", "dir2", "file.ext"]
199 --
200 } deriving (Show, Read, Eq)
201
202$(deriveJSON (L.map toLower . L.dropWhile isLower) ''FileInfo)
203
204
205-- | Info part of the .torrent file contain info about each content file.
206data ContentInfo =
207 SingleFile {
208 ciLength :: !Integer
209 -- ^ Length of the file in bytes.
210
211 , ciMD5sum :: Maybe ByteString
212 -- ^ 32 character long MD5 sum of the file.
213 -- Used by third-party tools, not by bittorrent protocol itself.
214
215 , ciName :: !ByteString
216 -- ^ Suggested name of the file single file.
217
218
219
220 , ciPieceLength :: !Int
221 -- ^ Number of bytes in each piece.
222
223 , ciPieces :: !ByteString
224 -- ^ Concatenation of all 20-byte SHA1 hash values.
225
226 , ciPrivate :: Maybe Bool
227 -- ^ If set the client MUST publish its presence to get other
228 -- peers ONLY via the trackers explicity described in the
229 -- metainfo file.
230 --
231 -- BEP 27: <http://www.bittorrent.org/beps/bep_0027.html>
232 }
233
234 | MultiFile {
235 ciFiles :: ![FileInfo]
236 -- ^ List of the all files that torrent contains.
237
238 , ciName :: !ByteString
239 -- | The file path of the directory in which to store all the files.
240
241 , ciPieceLength :: !Int
242 , ciPieces :: !ByteString
243 , ciPrivate :: Maybe Bool
244 } deriving (Show, Read, Eq)
245
246$(deriveJSON id ''ContentInfo)
247
248-- TODO more convenient form of torrent info.
249-- | Metainfo about particular torrent.
250data Torrent = Torrent {
251 tInfoHash :: !InfoHash
252 -- ^ SHA1 hash of the 'TorrentInfo' of the 'Torrent'.
253
254 , tAnnounce :: !URI
255 -- ^ The URL of the tracker.
256
257 -- NOTE: out of lexicographic order!
258 , tInfo :: !ContentInfo
259 -- ^ Info about each content file.
260
261 , tAnnounceList :: Maybe [[URI]]
262 -- ^ Announce list add multiple tracker support.
263 --
264 -- BEP 12: <http://www.bittorrent.org/beps/bep_0012.html>
265
266 , tComment :: Maybe Text
267 -- ^ Free-form comments of the author.
268
269 , tCreatedBy :: Maybe ByteString
270 -- ^ Name and version of the program used to create the .torrent.
271
272 , tCreationDate :: Maybe Time
273 -- ^ Creation time of the torrent, in standard UNIX epoch.
274
275 , tEncoding :: Maybe ByteString
276 -- ^ String encoding format used to generate the pieces part of
277 -- the info dictionary in the .torrent metafile.
278
279 , tPublisher :: Maybe URI
280 -- ^ Containing the RSA public key of the publisher of the
281 -- torrent. Private counterpart of this key that has the
282 -- authority to allow new peers onto the swarm.
283
284 , tPublisherURL :: Maybe URI
285 , tSignature :: Maybe ByteString
286 -- ^ The RSA signature of the info dictionary (specifically,
287 -- the encrypted SHA-1 hash of the info dictionary).
288 } deriving (Show, Eq)
289
290instance Hashable Torrent where
291 hash = Hashable.hash . tInfoHash
292
293{- note that info hash is actually reduntant field
294 but it's better to keep it here to avoid heavy recomputations
295-}
296
297-- | Smart constructor for 'Torrent' which compute info hash.
298mktorrent :: URI -> ContentInfo
299 -> Maybe [[URI]] -> Maybe Text -> Maybe ByteString
300 -> Maybe Time -> Maybe ByteString -> Maybe URI
301 -> Maybe URI -> Maybe ByteString
302 -> Torrent
303mktorrent announce info = Torrent (hashlazy (BE.encoded info)) announce info
304
305-- | A simple torrent contains only required fields.
306simpleTorrent :: URI -> ContentInfo -> Torrent
307simpleTorrent announce info = mktorrent announce info
308 Nothing Nothing Nothing
309 Nothing Nothing Nothing
310 Nothing Nothing
311
312-- TODO check if pieceLength is power of 2
313
314instance BEncodable URI where
315 toBEncode uri = toBEncode (BC.pack (uriToString id uri ""))
316 {-# INLINE toBEncode #-}
317
318 fromBEncode (BString s) | Just url <- parseURI (BC.unpack s) = return url
319 fromBEncode b = decodingError $ "url <" ++ show b ++ ">"
320 {-# INLINE fromBEncode #-}
321
322instance BEncodable Torrent where
323 toBEncode Torrent {..} = fromAscAssocs
324 [ "announce" --> tAnnounce
325 , "announce-list" -->? tAnnounceList
326 , "comment" -->? tComment
327 , "created by" -->? tCreatedBy
328 , "creation date" -->? tCreationDate
329 , "encoding" -->? tEncoding
330 , "info" --> tInfo
331 , "publisher" -->? tPublisher
332 , "publisher-url" -->? tPublisherURL
333 , "signature" -->? tSignature
334 ]
335
336 fromBEncode (BDict d) | Just info <- M.lookup "info" d =
337 Torrent <$> pure (hashlazy (BE.encode info)) -- WARN
338 <*> d >-- "announce"
339 <*> d >-- "info"
340 <*> d >--? "announce-list"
341 <*> d >--? "comment"
342 <*> d >--? "created by"
343 <*> d >--? "creation date"
344 <*> d >--? "encoding"
345 <*> d >--? "publisher"
346 <*> d >--? "publisher-url"
347 <*> d >--? "signature"
348
349 fromBEncode _ = decodingError "Torrent"
350
351
352instance BEncodable ContentInfo where
353 toBEncode SingleFile {..} = fromAscAssocs
354 [ "length" --> ciLength
355 , "md5sum" -->? ciMD5sum
356 , "name" --> ciName
357
358 , "piece length" --> ciPieceLength
359 , "pieces" --> ciPieces
360 , "private" -->? ciPrivate
361 ]
362
363 toBEncode MultiFile {..} = fromAscAssocs
364 [ "files" --> ciFiles
365 , "name" --> ciName
366
367 , "piece length" --> ciPieceLength
368 , "pieces" --> ciPieces
369 , "private" -->? ciPrivate
370 ]
371
372 fromBEncode (BDict d)
373 | Just (BList fs) <- M.lookup "files" d =
374 MultiFile <$> mapM fromBEncode fs
375 <*> d >-- "name"
376 <*> d >-- "piece length"
377 <*> d >-- "pieces"
378 <*> d >--? "private"
379 | otherwise =
380 SingleFile <$> d >-- "length"
381 <*> d >--? "md5sum"
382 <*> d >-- "name"
383 <*> d >-- "piece length"
384 <*> d >-- "pieces"
385 <*> d >--? "private"
386 fromBEncode _ = decodingError "ContentInfo"
387
388
389instance BEncodable FileInfo where
390 toBEncode FileInfo {..} = fromAssocs
391 [ "length" --> fiLength
392 , "md5sum" -->? fiMD5sum
393 , "path" --> fiPath
394 ]
395
396 fromBEncode (BDict d) =
397 FileInfo <$> d >-- "length"
398 <*> d >--? "md5sum"
399 <*> d >-- "path"
400
401 fromBEncode _ = decodingError "FileInfo"
402
403
404-- | Divide and round up.
405sizeInBase :: Integral a => a -> Int -> Int
406sizeInBase n b = fromIntegral (n `div` fromIntegral b) + align
407 where
408 align = if n `mod` fromIntegral b == 0 then 0 else 1
409{-# SPECIALIZE sizeInBase :: Int -> Int -> Int #-}
410{-# SPECIALIZE sizeInBase :: Integer -> Int -> Int #-}
411
412
413-- | Find sum of sizes of the all torrent files.
414contentLength :: ContentInfo -> Integer
415contentLength SingleFile { ciLength = len } = len
416contentLength MultiFile { ciFiles = tfs } = sum (map fiLength tfs)
417
418-- | Get count of all files in torrent.
419fileCount :: ContentInfo -> Int
420fileCount SingleFile {..} = 1
421fileCount MultiFile {..} = length ciFiles
422
423-- | Find count of pieces in the torrent. If torrent size is not a
424-- multiple of piece size then the count is rounded up.
425pieceCount :: ContentInfo -> Int
426pieceCount ci = contentLength ci `sizeInBase` ciPieceLength ci
427
428-- | Find number of blocks of the specified size. If torrent size is
429-- not a multiple of block size then the count is rounded up.
430blockCount :: Int -- ^ Block size.
431 -> ContentInfo -- ^ Torrent content info.
432 -> Int -- ^ Number of blocks.
433blockCount blkSize ci = contentLength ci `sizeInBase` blkSize
434
435-- | File layout specifies the order and the size of each file in the
436-- storage. Note that order of files is highly important since we
437-- coalesce all the files in the given order to get the linear block
438-- address space.
439--
440type Layout = [(FilePath, Int)]
441
442-- | Extract files layout from torrent info with the given root path.
443contentLayout :: FilePath -- ^ Root path for the all torrent files.
444 -> ContentInfo -- ^ Torrent content information.
445 -> Layout -- ^ The all file paths prefixed with the
446 -- given root.
447contentLayout rootPath = filesLayout
448 where
449 filesLayout (SingleFile { ciName = name, ciLength = len })
450 = [(rootPath </> BC.unpack name, fromIntegral len)]
451 filesLayout (MultiFile { ciFiles = fs, ciName = dir }) =
452 map (first mkPath . fl) fs
453 where -- TODO use utf8 encoding in name
454 mkPath = ((rootPath </> BC.unpack dir) </>) . joinPath . map BC.unpack
455
456 fl (FileInfo { fiPath = p, fiLength = len }) = (p, fromIntegral len)
457
458layoutOffsets :: Layout -> Layout
459layoutOffsets = go 0
460 where
461 go !_ [] = []
462 go !offset ((n, s) : xs) = (n, offset) : go (offset + s) xs
463
464-- | Gives global offset of a content file for a given full path.
465fileOffset :: FilePath -> ContentInfo -> Maybe Integer
466fileOffset fullPath
467 = fmap fromIntegral . lookup fullPath . layoutOffsets . contentLayout ""
468
469-- | Test if this is single file torrent.
470isSingleFile :: ContentInfo -> Bool
471isSingleFile SingleFile {} = True
472isSingleFile _ = False
473{-# INLINE isSingleFile #-}
474
475-- | Test if this is multifile torrent.
476isMultiFile :: ContentInfo -> Bool
477isMultiFile MultiFile {} = True
478isMultiFile _ = False
479{-# INLINE isMultiFile #-}
480
481slice :: Int -> Int -> ByteString -> ByteString
482slice from siz = B.take siz . B.drop from
483{-# INLINE slice #-}
484
485-- | Extract validation hash by specified piece index.
486pieceHash :: ContentInfo -> Int -> ByteString
487pieceHash ci ix = slice (hashsize * ix) hashsize (ciPieces ci)
488 where
489 hashsize = 20
490
491-- | Validate piece with metainfo hash.
492checkPiece :: ContentInfo -> Int -> ByteString -> Bool
493checkPiece ci ix piece
494 = B.length piece == ciPieceLength ci
495 && C.hash piece == pieceHash ci ix
496
497-- | Extension usually used for torrent metafiles.
498torrentExt :: String
499torrentExt = "torrent"
500
501-- | Test if this path has proper extension.
502isTorrentPath :: FilePath -> Bool
503isTorrentPath filepath = takeExtension filepath == extSeparator : torrentExt
504
505-- | Read and decode a .torrent file.
506fromFile :: FilePath -> IO Torrent
507fromFile filepath = do
508 contents <- B.readFile filepath
509 case decoded contents of
510 Right !t -> return t
511 Left msg -> throwIO $ userError $ msg ++ " while reading torrent" \ No newline at end of file