diff options
Diffstat (limited to 'src/Data/Torrent.hs')
-rw-r--r-- | src/Data/Torrent.hs | 511 |
1 files changed, 0 insertions, 511 deletions
diff --git a/src/Data/Torrent.hs b/src/Data/Torrent.hs deleted file mode 100644 index 36974ccf..00000000 --- a/src/Data/Torrent.hs +++ /dev/null | |||
@@ -1,511 +0,0 @@ | |||
1 | -- | | ||
2 | -- Copyright : (c) Sam T. 2013 | ||
3 | -- License : MIT | ||
4 | -- Maintainer : pxqr.sta@gmail.com | ||
5 | -- Stability : experimental | ||
6 | -- Portability : portable | ||
7 | -- | ||
8 | -- Torrent file contains metadata about files and folders but not | ||
9 | -- content itself. The files are bencoded dictionaries. There is | ||
10 | -- also other info which is used to help join the swarm. | ||
11 | -- | ||
12 | -- This module provides torrent metainfo serialization and info hash | ||
13 | -- extraction. | ||
14 | -- | ||
15 | -- For more info see: | ||
16 | -- <http://www.bittorrent.org/beps/bep_0003.html#metainfo-files>, | ||
17 | -- <https://wiki.theory.org/BitTorrentSpecification#Metainfo_File_Structure> | ||
18 | -- | ||
19 | {-# LANGUAGE CPP #-} | ||
20 | {-# LANGUAGE FlexibleInstances #-} | ||
21 | {-# LANGUAGE BangPatterns #-} | ||
22 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} | ||
23 | {-# LANGUAGE TemplateHaskell #-} | ||
24 | {-# OPTIONS -fno-warn-orphans #-} | ||
25 | -- TODO refine interface | ||
26 | module Data.Torrent | ||
27 | ( -- * Torrent | ||
28 | Torrent(..), ContentInfo(..), FileInfo(..) | ||
29 | , mktorrent, simpleTorrent | ||
30 | , torrentExt, isTorrentPath | ||
31 | , fromFile | ||
32 | |||
33 | -- * Files layout | ||
34 | , Layout, contentLayout | ||
35 | , contentLength, fileCount, fileOffset | ||
36 | , pieceCount, blockCount | ||
37 | , isSingleFile, isMultiFile | ||
38 | |||
39 | , checkPiece | ||
40 | |||
41 | -- * Info hash | ||
42 | #if defined (TESTING) | ||
43 | , InfoHash(..) | ||
44 | #else | ||
45 | , InfoHash(..) | ||
46 | #endif | ||
47 | , ppInfoHash | ||
48 | , addHashToURI | ||
49 | |||
50 | -- * Extra | ||
51 | , sizeInBase | ||
52 | |||
53 | -- #if defined (TESTING) | ||
54 | -- * Internal | ||
55 | , Data.Torrent.hash | ||
56 | , Data.Torrent.hashlazy | ||
57 | , layoutOffsets | ||
58 | , slice | ||
59 | -- #endif | ||
60 | ) where | ||
61 | |||
62 | import Prelude hiding (sum) | ||
63 | |||
64 | import Control.Applicative | ||
65 | import Control.Arrow | ||
66 | import Control.Exception | ||
67 | import Control.Monad | ||
68 | |||
69 | import qualified Crypto.Hash.SHA1 as C | ||
70 | |||
71 | import Data.Aeson | ||
72 | import Data.Aeson.TH | ||
73 | import Data.BEncode as BE | ||
74 | import Data.Char | ||
75 | import Data.Foldable | ||
76 | import qualified Data.ByteString as B | ||
77 | import Data.ByteString.Internal | ||
78 | import qualified Data.ByteString.Char8 as BC (pack, unpack) | ||
79 | import qualified Data.ByteString.Lazy as Lazy | ||
80 | import qualified Data.ByteString.Lazy.Builder as B | ||
81 | import qualified Data.ByteString.Lazy.Builder.ASCII as B | ||
82 | import qualified Data.List as L | ||
83 | import Data.Map (Map) | ||
84 | import qualified Data.Map as M | ||
85 | import Data.Hashable as Hashable | ||
86 | import Data.Text (Text) | ||
87 | import Data.Serialize as S hiding (Result) | ||
88 | import Text.PrettyPrint | ||
89 | import Text.ParserCombinators.ReadP as P | ||
90 | |||
91 | import Network.URI | ||
92 | import System.FilePath | ||
93 | import Numeric | ||
94 | |||
95 | {----------------------------------------------------------------------- | ||
96 | Info hash | ||
97 | -----------------------------------------------------------------------} | ||
98 | |||
99 | -- | Exactly 20 bytes long SHA1 hash of the info part of torrent file. | ||
100 | newtype InfoHash = InfoHash { getInfoHash :: ByteString } | ||
101 | deriving (Eq, Ord, ToJSON, FromJSON) | ||
102 | |||
103 | instance Show InfoHash where | ||
104 | show = render . ppInfoHash | ||
105 | |||
106 | instance Read InfoHash where | ||
107 | readsPrec _ = readP_to_S $ do | ||
108 | str <- replicateM 40 (satisfy isHexDigit) | ||
109 | return $ InfoHash $ decodeIH str | ||
110 | where | ||
111 | decodeIH = B.pack . map fromHex . pair | ||
112 | fromHex (a, b) = read $ '0' : 'x' : a : b : [] | ||
113 | |||
114 | pair (a : b : xs) = (a, b) : pair xs | ||
115 | pair _ = [] | ||
116 | |||
117 | instance Hashable InfoHash where | ||
118 | hash = Hashable.hash . getInfoHash | ||
119 | |||
120 | instance BEncodable InfoHash where | ||
121 | toBEncode = toBEncode . getInfoHash | ||
122 | fromBEncode be = InfoHash <$> fromBEncode be | ||
123 | |||
124 | instance Serialize InfoHash where | ||
125 | put = putByteString . getInfoHash | ||
126 | get = InfoHash <$> getBytes 20 | ||
127 | |||
128 | instance BEncodable a => BEncodable (Map InfoHash a) where | ||
129 | {-# SPECIALIZE instance BEncodable a => BEncodable (Map InfoHash a) #-} | ||
130 | fromBEncode b = M.mapKeys InfoHash <$> fromBEncode b | ||
131 | {-# INLINE fromBEncode #-} | ||
132 | |||
133 | toBEncode = toBEncode . M.mapKeys getInfoHash | ||
134 | {-# INLINE toBEncode #-} | ||
135 | |||
136 | -- | Hash strict bytestring using SHA1 algorithm. | ||
137 | hash :: ByteString -> InfoHash | ||
138 | hash = InfoHash . C.hash | ||
139 | |||
140 | -- | Hash lazy bytestring using SHA1 algorithm. | ||
141 | hashlazy :: Lazy.ByteString -> InfoHash | ||
142 | hashlazy = InfoHash . C.hashlazy | ||
143 | |||
144 | -- | Pretty print info hash in hexadecimal format. | ||
145 | ppInfoHash :: InfoHash -> Doc | ||
146 | ppInfoHash = text . BC.unpack . ppHex . getInfoHash | ||
147 | |||
148 | ppHex :: ByteString -> ByteString | ||
149 | ppHex = Lazy.toStrict . B.toLazyByteString . B.byteStringHexFixed | ||
150 | |||
151 | -- | Add query info hash parameter to uri. | ||
152 | -- | ||
153 | -- > info_hash=<url_encoded_info_hash> | ||
154 | -- | ||
155 | addHashToURI :: URI -> InfoHash -> URI | ||
156 | addHashToURI uri s = uri { | ||
157 | uriQuery = uriQuery uri ++ mkPref (uriQuery uri) ++ | ||
158 | "info_hash=" ++ rfc1738Encode (BC.unpack (getInfoHash s)) | ||
159 | } | ||
160 | where | ||
161 | mkPref [] = "?" | ||
162 | mkPref ('?' : _) = "&" | ||
163 | mkPref _ = error "addHashToURI" | ||
164 | |||
165 | rfc1738Encode = L.concatMap (\c -> if unreservedS c then [c] else encodeHex c) | ||
166 | where | ||
167 | unreservedS = (`L.elem` chars) | ||
168 | chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_./" | ||
169 | encodeHex c = '%' : pHex c | ||
170 | pHex c = let p = (showHex . ord $ c) "" | ||
171 | in if L.length p == 1 then '0' : p else p | ||
172 | |||
173 | {----------------------------------------------------------------------- | ||
174 | Torrent metainfo | ||
175 | -----------------------------------------------------------------------} | ||
176 | |||
177 | type Time = Text | ||
178 | |||
179 | -- | Contain info about one single file. | ||
180 | data FileInfo = FileInfo { | ||
181 | fiLength :: !Integer | ||
182 | -- ^ Length of the file in bytes. | ||
183 | |||
184 | , fiMD5sum :: Maybe ByteString | ||
185 | -- ^ 32 character long MD5 sum of the file. | ||
186 | -- Used by third-party tools, not by bittorrent protocol itself. | ||
187 | |||
188 | , fiPath :: ![ByteString] | ||
189 | -- ^ One or more string elements that together represent the | ||
190 | -- path and filename. Each element in the list corresponds to | ||
191 | -- either a directory name or (in the case of the last | ||
192 | -- element) the filename. For example, the file: | ||
193 | -- | ||
194 | -- > "dir1/dir2/file.ext" | ||
195 | -- | ||
196 | -- would consist of three string elements: | ||
197 | -- | ||
198 | -- > ["dir1", "dir2", "file.ext"] | ||
199 | -- | ||
200 | } deriving (Show, Read, Eq) | ||
201 | |||
202 | $(deriveJSON (L.map toLower . L.dropWhile isLower) ''FileInfo) | ||
203 | |||
204 | |||
205 | -- | Info part of the .torrent file contain info about each content file. | ||
206 | data ContentInfo = | ||
207 | SingleFile { | ||
208 | ciLength :: !Integer | ||
209 | -- ^ Length of the file in bytes. | ||
210 | |||
211 | , ciMD5sum :: Maybe ByteString | ||
212 | -- ^ 32 character long MD5 sum of the file. | ||
213 | -- Used by third-party tools, not by bittorrent protocol itself. | ||
214 | |||
215 | , ciName :: !ByteString | ||
216 | -- ^ Suggested name of the file single file. | ||
217 | |||
218 | |||
219 | |||
220 | , ciPieceLength :: !Int | ||
221 | -- ^ Number of bytes in each piece. | ||
222 | |||
223 | , ciPieces :: !ByteString | ||
224 | -- ^ Concatenation of all 20-byte SHA1 hash values. | ||
225 | |||
226 | , ciPrivate :: Maybe Bool | ||
227 | -- ^ If set the client MUST publish its presence to get other | ||
228 | -- peers ONLY via the trackers explicity described in the | ||
229 | -- metainfo file. | ||
230 | -- | ||
231 | -- BEP 27: <http://www.bittorrent.org/beps/bep_0027.html> | ||
232 | } | ||
233 | |||
234 | | MultiFile { | ||
235 | ciFiles :: ![FileInfo] | ||
236 | -- ^ List of the all files that torrent contains. | ||
237 | |||
238 | , ciName :: !ByteString | ||
239 | -- | The file path of the directory in which to store all the files. | ||
240 | |||
241 | , ciPieceLength :: !Int | ||
242 | , ciPieces :: !ByteString | ||
243 | , ciPrivate :: Maybe Bool | ||
244 | } deriving (Show, Read, Eq) | ||
245 | |||
246 | $(deriveJSON id ''ContentInfo) | ||
247 | |||
248 | -- TODO more convenient form of torrent info. | ||
249 | -- | Metainfo about particular torrent. | ||
250 | data Torrent = Torrent { | ||
251 | tInfoHash :: !InfoHash | ||
252 | -- ^ SHA1 hash of the 'TorrentInfo' of the 'Torrent'. | ||
253 | |||
254 | , tAnnounce :: !URI | ||
255 | -- ^ The URL of the tracker. | ||
256 | |||
257 | -- NOTE: out of lexicographic order! | ||
258 | , tInfo :: !ContentInfo | ||
259 | -- ^ Info about each content file. | ||
260 | |||
261 | , tAnnounceList :: Maybe [[URI]] | ||
262 | -- ^ Announce list add multiple tracker support. | ||
263 | -- | ||
264 | -- BEP 12: <http://www.bittorrent.org/beps/bep_0012.html> | ||
265 | |||
266 | , tComment :: Maybe Text | ||
267 | -- ^ Free-form comments of the author. | ||
268 | |||
269 | , tCreatedBy :: Maybe ByteString | ||
270 | -- ^ Name and version of the program used to create the .torrent. | ||
271 | |||
272 | , tCreationDate :: Maybe Time | ||
273 | -- ^ Creation time of the torrent, in standard UNIX epoch. | ||
274 | |||
275 | , tEncoding :: Maybe ByteString | ||
276 | -- ^ String encoding format used to generate the pieces part of | ||
277 | -- the info dictionary in the .torrent metafile. | ||
278 | |||
279 | , tPublisher :: Maybe URI | ||
280 | -- ^ Containing the RSA public key of the publisher of the | ||
281 | -- torrent. Private counterpart of this key that has the | ||
282 | -- authority to allow new peers onto the swarm. | ||
283 | |||
284 | , tPublisherURL :: Maybe URI | ||
285 | , tSignature :: Maybe ByteString | ||
286 | -- ^ The RSA signature of the info dictionary (specifically, | ||
287 | -- the encrypted SHA-1 hash of the info dictionary). | ||
288 | } deriving (Show, Eq) | ||
289 | |||
290 | instance Hashable Torrent where | ||
291 | hash = Hashable.hash . tInfoHash | ||
292 | |||
293 | {- note that info hash is actually reduntant field | ||
294 | but it's better to keep it here to avoid heavy recomputations | ||
295 | -} | ||
296 | |||
297 | -- | Smart constructor for 'Torrent' which compute info hash. | ||
298 | mktorrent :: URI -> ContentInfo | ||
299 | -> Maybe [[URI]] -> Maybe Text -> Maybe ByteString | ||
300 | -> Maybe Time -> Maybe ByteString -> Maybe URI | ||
301 | -> Maybe URI -> Maybe ByteString | ||
302 | -> Torrent | ||
303 | mktorrent announce info = Torrent (hashlazy (BE.encoded info)) announce info | ||
304 | |||
305 | -- | A simple torrent contains only required fields. | ||
306 | simpleTorrent :: URI -> ContentInfo -> Torrent | ||
307 | simpleTorrent announce info = mktorrent announce info | ||
308 | Nothing Nothing Nothing | ||
309 | Nothing Nothing Nothing | ||
310 | Nothing Nothing | ||
311 | |||
312 | -- TODO check if pieceLength is power of 2 | ||
313 | |||
314 | instance BEncodable URI where | ||
315 | toBEncode uri = toBEncode (BC.pack (uriToString id uri "")) | ||
316 | {-# INLINE toBEncode #-} | ||
317 | |||
318 | fromBEncode (BString s) | Just url <- parseURI (BC.unpack s) = return url | ||
319 | fromBEncode b = decodingError $ "url <" ++ show b ++ ">" | ||
320 | {-# INLINE fromBEncode #-} | ||
321 | |||
322 | instance BEncodable Torrent where | ||
323 | toBEncode Torrent {..} = fromAscAssocs | ||
324 | [ "announce" --> tAnnounce | ||
325 | , "announce-list" -->? tAnnounceList | ||
326 | , "comment" -->? tComment | ||
327 | , "created by" -->? tCreatedBy | ||
328 | , "creation date" -->? tCreationDate | ||
329 | , "encoding" -->? tEncoding | ||
330 | , "info" --> tInfo | ||
331 | , "publisher" -->? tPublisher | ||
332 | , "publisher-url" -->? tPublisherURL | ||
333 | , "signature" -->? tSignature | ||
334 | ] | ||
335 | |||
336 | fromBEncode (BDict d) | Just info <- M.lookup "info" d = | ||
337 | Torrent <$> pure (hashlazy (BE.encode info)) -- WARN | ||
338 | <*> d >-- "announce" | ||
339 | <*> d >-- "info" | ||
340 | <*> d >--? "announce-list" | ||
341 | <*> d >--? "comment" | ||
342 | <*> d >--? "created by" | ||
343 | <*> d >--? "creation date" | ||
344 | <*> d >--? "encoding" | ||
345 | <*> d >--? "publisher" | ||
346 | <*> d >--? "publisher-url" | ||
347 | <*> d >--? "signature" | ||
348 | |||
349 | fromBEncode _ = decodingError "Torrent" | ||
350 | |||
351 | |||
352 | instance BEncodable ContentInfo where | ||
353 | toBEncode SingleFile {..} = fromAscAssocs | ||
354 | [ "length" --> ciLength | ||
355 | , "md5sum" -->? ciMD5sum | ||
356 | , "name" --> ciName | ||
357 | |||
358 | , "piece length" --> ciPieceLength | ||
359 | , "pieces" --> ciPieces | ||
360 | , "private" -->? ciPrivate | ||
361 | ] | ||
362 | |||
363 | toBEncode MultiFile {..} = fromAscAssocs | ||
364 | [ "files" --> ciFiles | ||
365 | , "name" --> ciName | ||
366 | |||
367 | , "piece length" --> ciPieceLength | ||
368 | , "pieces" --> ciPieces | ||
369 | , "private" -->? ciPrivate | ||
370 | ] | ||
371 | |||
372 | fromBEncode (BDict d) | ||
373 | | Just (BList fs) <- M.lookup "files" d = | ||
374 | MultiFile <$> mapM fromBEncode fs | ||
375 | <*> d >-- "name" | ||
376 | <*> d >-- "piece length" | ||
377 | <*> d >-- "pieces" | ||
378 | <*> d >--? "private" | ||
379 | | otherwise = | ||
380 | SingleFile <$> d >-- "length" | ||
381 | <*> d >--? "md5sum" | ||
382 | <*> d >-- "name" | ||
383 | <*> d >-- "piece length" | ||
384 | <*> d >-- "pieces" | ||
385 | <*> d >--? "private" | ||
386 | fromBEncode _ = decodingError "ContentInfo" | ||
387 | |||
388 | |||
389 | instance BEncodable FileInfo where | ||
390 | toBEncode FileInfo {..} = fromAssocs | ||
391 | [ "length" --> fiLength | ||
392 | , "md5sum" -->? fiMD5sum | ||
393 | , "path" --> fiPath | ||
394 | ] | ||
395 | |||
396 | fromBEncode (BDict d) = | ||
397 | FileInfo <$> d >-- "length" | ||
398 | <*> d >--? "md5sum" | ||
399 | <*> d >-- "path" | ||
400 | |||
401 | fromBEncode _ = decodingError "FileInfo" | ||
402 | |||
403 | |||
404 | -- | Divide and round up. | ||
405 | sizeInBase :: Integral a => a -> Int -> Int | ||
406 | sizeInBase n b = fromIntegral (n `div` fromIntegral b) + align | ||
407 | where | ||
408 | align = if n `mod` fromIntegral b == 0 then 0 else 1 | ||
409 | {-# SPECIALIZE sizeInBase :: Int -> Int -> Int #-} | ||
410 | {-# SPECIALIZE sizeInBase :: Integer -> Int -> Int #-} | ||
411 | |||
412 | |||
413 | -- | Find sum of sizes of the all torrent files. | ||
414 | contentLength :: ContentInfo -> Integer | ||
415 | contentLength SingleFile { ciLength = len } = len | ||
416 | contentLength MultiFile { ciFiles = tfs } = sum (map fiLength tfs) | ||
417 | |||
418 | -- | Get count of all files in torrent. | ||
419 | fileCount :: ContentInfo -> Int | ||
420 | fileCount SingleFile {..} = 1 | ||
421 | fileCount MultiFile {..} = length ciFiles | ||
422 | |||
423 | -- | Find count of pieces in the torrent. If torrent size is not a | ||
424 | -- multiple of piece size then the count is rounded up. | ||
425 | pieceCount :: ContentInfo -> Int | ||
426 | pieceCount ci = contentLength ci `sizeInBase` ciPieceLength ci | ||
427 | |||
428 | -- | Find number of blocks of the specified size. If torrent size is | ||
429 | -- not a multiple of block size then the count is rounded up. | ||
430 | blockCount :: Int -- ^ Block size. | ||
431 | -> ContentInfo -- ^ Torrent content info. | ||
432 | -> Int -- ^ Number of blocks. | ||
433 | blockCount blkSize ci = contentLength ci `sizeInBase` blkSize | ||
434 | |||
435 | -- | File layout specifies the order and the size of each file in the | ||
436 | -- storage. Note that order of files is highly important since we | ||
437 | -- coalesce all the files in the given order to get the linear block | ||
438 | -- address space. | ||
439 | -- | ||
440 | type Layout = [(FilePath, Int)] | ||
441 | |||
442 | -- | Extract files layout from torrent info with the given root path. | ||
443 | contentLayout :: FilePath -- ^ Root path for the all torrent files. | ||
444 | -> ContentInfo -- ^ Torrent content information. | ||
445 | -> Layout -- ^ The all file paths prefixed with the | ||
446 | -- given root. | ||
447 | contentLayout rootPath = filesLayout | ||
448 | where | ||
449 | filesLayout (SingleFile { ciName = name, ciLength = len }) | ||
450 | = [(rootPath </> BC.unpack name, fromIntegral len)] | ||
451 | filesLayout (MultiFile { ciFiles = fs, ciName = dir }) = | ||
452 | map (first mkPath . fl) fs | ||
453 | where -- TODO use utf8 encoding in name | ||
454 | mkPath = ((rootPath </> BC.unpack dir) </>) . joinPath . map BC.unpack | ||
455 | |||
456 | fl (FileInfo { fiPath = p, fiLength = len }) = (p, fromIntegral len) | ||
457 | |||
458 | layoutOffsets :: Layout -> Layout | ||
459 | layoutOffsets = go 0 | ||
460 | where | ||
461 | go !_ [] = [] | ||
462 | go !offset ((n, s) : xs) = (n, offset) : go (offset + s) xs | ||
463 | |||
464 | -- | Gives global offset of a content file for a given full path. | ||
465 | fileOffset :: FilePath -> ContentInfo -> Maybe Integer | ||
466 | fileOffset fullPath | ||
467 | = fmap fromIntegral . lookup fullPath . layoutOffsets . contentLayout "" | ||
468 | |||
469 | -- | Test if this is single file torrent. | ||
470 | isSingleFile :: ContentInfo -> Bool | ||
471 | isSingleFile SingleFile {} = True | ||
472 | isSingleFile _ = False | ||
473 | {-# INLINE isSingleFile #-} | ||
474 | |||
475 | -- | Test if this is multifile torrent. | ||
476 | isMultiFile :: ContentInfo -> Bool | ||
477 | isMultiFile MultiFile {} = True | ||
478 | isMultiFile _ = False | ||
479 | {-# INLINE isMultiFile #-} | ||
480 | |||
481 | slice :: Int -> Int -> ByteString -> ByteString | ||
482 | slice from siz = B.take siz . B.drop from | ||
483 | {-# INLINE slice #-} | ||
484 | |||
485 | -- | Extract validation hash by specified piece index. | ||
486 | pieceHash :: ContentInfo -> Int -> ByteString | ||
487 | pieceHash ci ix = slice (hashsize * ix) hashsize (ciPieces ci) | ||
488 | where | ||
489 | hashsize = 20 | ||
490 | |||
491 | -- | Validate piece with metainfo hash. | ||
492 | checkPiece :: ContentInfo -> Int -> ByteString -> Bool | ||
493 | checkPiece ci ix piece | ||
494 | = B.length piece == ciPieceLength ci | ||
495 | && C.hash piece == pieceHash ci ix | ||
496 | |||
497 | -- | Extension usually used for torrent metafiles. | ||
498 | torrentExt :: String | ||
499 | torrentExt = "torrent" | ||
500 | |||
501 | -- | Test if this path has proper extension. | ||
502 | isTorrentPath :: FilePath -> Bool | ||
503 | isTorrentPath filepath = takeExtension filepath == extSeparator : torrentExt | ||
504 | |||
505 | -- | Read and decode a .torrent file. | ||
506 | fromFile :: FilePath -> IO Torrent | ||
507 | fromFile filepath = do | ||
508 | contents <- B.readFile filepath | ||
509 | case decoded contents of | ||
510 | Right !t -> return t | ||
511 | Left msg -> throwIO $ userError $ msg ++ " while reading torrent" \ No newline at end of file | ||