1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
|
-- TODO pprint
-- TODO see if this IntervalMap is overkill: Interval dataty have 4 constrs
-- TODO clarify lifetime in docs
-- TODO use madvise
-- TODO unmap selected interval
-- TODO tests
-- TODO benchmarks
-- TODO unmap overlapped regions
-- [A] TODO lazy mapping for 32 bit arch;
-- we need tricky algorithm and a lot of perf tests
-- TODO use memmove in write bytes
-- TODO write elem, write byte, read byte
-- |
-- Copyright : (c) Sam T. 2013
-- License : MIT
-- Maintainer : pxqr.sta@gmail.com
-- Stability : experimental
-- Portability : portable
--
-- This library provides mechanism to mmap files to fixed address
-- with fine-grained control. Hovewer, instead of using MAP_FIXED we
-- create our own address space upon virtual address space. If you
-- would like you could call this space as "fixed address space".
--
-- This solves a few problems:
--
-- * Page already in use. If you mmap one file at 0..x addresses and
-- want to map second file to x..y addresses using MAP_FIXED you
-- can get in troubles: page might be mapped already. Raw call to
-- mmap will silently unmap x..y addresses and then mmap our second
-- file. So here we have extra unmap we would like to avoid.
--
-- * Page boundaries. If you mmap one file at x..x+1 you could
-- not map next file to say addresses x+1..x+2.
--
-- Internally we make ordinary call to mmap to map a file and then
-- using /interval map/ we map fixed address space to virtual
-- address space. It takes TODO time in TODO cases.
--
-- Basically this library could be used when we need coalesce
-- several files in arbitrary way. We could map at any position as
-- long as offset + size fit in 'Int'.
--
-- For other details see:
--
-- > http://hackage.haskell.org/package/mmap
-- > man mmap
--
module System.IO.MMap.Fixed
( -- * Intervals
FixedOffset, FileOffset, FixedInterval, FileInterval
, interval, fileInterval
-- * Construction
, Fixed, Bytes
, System.IO.MMap.Fixed.empty, insertTo
, coalesceFiles
-- ** Specialized 'insertTo'
, mmapTo, mallocTo
-- * Query
, upperAddr
-- * Access
, viewBytes, readBytes, writeBytes
, readElem, writeElem
) where
import Data.ByteString.Lazy as Lazy
import Data.ByteString.Lazy.Internal as Lazy
import Data.ByteString.Internal as B
import Data.List as L
import Data.Int
import Data.IntervalMap.Strict as M
import Data.IntervalMap.Interval
import System.IO.MMap
import Foreign
type FixedOffset = Int
type FileOffset = Int64
type Size = Int
type FileInterval = (FileOffset, Size)
type FixedInterval = Interval FixedOffset
interval :: FixedOffset -> Size -> FixedInterval
interval off s = IntervalCO off (off + fromIntegral (max 0 s))
{-# INLINE interval #-}
fileInterval :: FileOffset -> Size -> FileInterval
fileInterval off s = (off, s)
{-# INLINE fileInterval #-}
intervalSize :: FixedInterval -> Size
intervalSize i = upperBound i - lowerBound i
{-# INLINE intervalSize #-}
type Bytes = (ForeignPtr Word8, Size)
type FixedMap = IntervalMap FixedOffset Bytes
newtype Fixed = Fixed { imap :: FixedMap }
instance Show Fixed where
show = show . M.toList . imap
mapIM :: (FixedMap -> FixedMap) -> Fixed -> Fixed
mapIM f s = s { imap = f (imap s) }
empty :: Fixed
empty = Fixed M.empty
coalesceFiles :: [(FilePath, Int)] -> IO Fixed
coalesceFiles = go 0 System.IO.MMap.Fixed.empty
where
go _ s [] = return s
go offset s ((path, bsize) : xs) = do
s' <- mmapTo path (0, bsize) offset s
go (offset + bsize) s' xs
upperAddr :: Fixed -> FixedOffset
upperAddr = upperBound . fst . findLast . imap
insertTo :: FixedInterval -> Bytes -> Fixed -> Fixed
insertTo fi mm = mapIM (M.insert fi mm)
{-# INLINE insertTo #-}
mmapTo :: FilePath -> FileInterval -> FixedOffset -> Fixed -> IO Fixed
mmapTo path mrange to s = do
(fptr, offset, fsize) <- mmapFileForeignPtr path ReadWriteEx (Just mrange)
let fixed = interval to fsize
let mmaped = (fptr, offset)
return $ insertTo fixed mmaped s
mallocTo :: FixedInterval -> Fixed -> IO Fixed
mallocTo fi s = do
let bsize = intervalSize fi
fptr <- mallocForeignPtrBytes bsize
return (insertTo fi (fptr, 0) s)
-- | Note: this is unsafe operation.
viewBytes :: FixedInterval -> Fixed -> Lazy.ByteString
viewBytes fi s = fromChunks $ L.map mk $ (imap s `intersecting` fi)
where
mk (i, (fptr, offset)) =
let dropB = max 0 (lowerBound fi - lowerBound i)
dropT = max 0 (upperBound i - upperBound fi)
bsize = intervalSize i - (dropT + dropB)
in fromForeignPtr fptr (offset + dropB) bsize
readBytes :: FixedInterval -> Fixed -> IO Lazy.ByteString
readBytes fi s = let c = Lazy.copy (viewBytes fi s) in mkCopy c >> return c
{-# INLINE readBytes #-}
writeBytes :: FixedInterval -> Lazy.ByteString -> Fixed -> IO ()
writeBytes fi bs s = bscpy (viewBytes fi s) bs
{-# INLINE writeBytes #-}
-- | Note: this operation takes O(log(files count)) time, if possible
-- use readBytes.
readElem :: Storable a => Fixed -> FixedOffset -> IO a
readElem s offset = go undefined
where
go :: Storable a => a -> IO a
go dont_touch = do
let bsize = sizeOf dont_touch
let PS fptr off _ = Lazy.toStrict (viewBytes (interval offset bsize) s)
withForeignPtr fptr $ \ ptr -> peekByteOff ptr off
writeElem :: Storable a => Fixed -> FixedOffset -> a -> IO ()
writeElem s offset x = do
let bsize = sizeOf x
let PS fptr off _ = Lazy.toStrict (viewBytes (interval offset bsize) s)
withForeignPtr fptr $ \ptr -> pokeByteOff ptr off x
mkCopy :: Lazy.ByteString -> IO ()
mkCopy Empty = return ()
mkCopy (Chunk _ x) = mkCopy x
bscpy :: Lazy.ByteString -> Lazy.ByteString -> IO ()
bscpy (PS _ _ 0 `Chunk` dest_rest) src = bscpy dest_rest src
bscpy dest (PS _ _ 0 `Chunk` src_rest) = bscpy dest src_rest
bscpy (PS dest_fptr dest_off dest_size `Chunk` dest_rest)
(PS src_fptr src_off src_size `Chunk` src_rest)
= do let csize = min dest_size src_size
withForeignPtr dest_fptr $ \dest_ptr ->
withForeignPtr src_fptr $ \src_ptr ->
memcpy (dest_ptr `advancePtr` dest_off)
(src_ptr `advancePtr` src_off)
(fromIntegral csize) -- TODO memmove?
bscpy (PS dest_fptr (dest_off + csize) (dest_size - csize) `Chunk` dest_rest)
(PS src_fptr (src_off + csize) (src_size - csize) `Chunk` src_rest)
bscpy _ _ = return ()
|