summaryrefslogtreecommitdiff
path: root/src/System/IO/MMap/Fixed.hs
blob: df6a66036f035779d88017e322e6bfc70c66ac5c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
-- TODO pprint
-- TODO see if this IntervalMap is overkill: Interval dataty have 4 constrs
-- TODO clarify lifetime in docs
-- TODO use madvise
-- TODO unmap selected interval
-- TODO tests
-- TODO benchmarks
-- TODO unmap overlapped regions
-- [A] TODO lazy mapping for 32 bit arch;
--          we need tricky algorithm and a lot of perf tests
-- TODO use memmove in write bytes
-- TODO write elem, write byte, read byte
-- |
--   Copyright   :  (c) Sam T. 2013
--   License     :  MIT
--   Maintainer  :  pxqr.sta@gmail.com
--   Stability   :  experimental
--   Portability :  portable
--
--   This library provides mechanism to mmap files to fixed address
--   with fine-grained control. Hovewer, instead of using MAP_FIXED we
--   create our own address space upon virtual address space. If you
--   would like you could call this space as "fixed address space".
--
--   This solves a few problems:
--
--     * Page already in use. If you mmap one file at 0..x addresses and
--     want to map second file to x..y addresses using MAP_FIXED you
--     can get in troubles: page might be mapped already. Raw call to
--     mmap will silently unmap x..y addresses and then mmap our second
--     file. So here we have extra unmap we would like to avoid.
--
--     * Page boundaries. If you mmap one file at x..x+1 you could
--     not map next file to say addresses x+1..x+2.
--
--   Internally we make ordinary call to mmap to map a file and then
--   using /interval map/ we map fixed address space to virtual
--   address space. It takes TODO time in TODO cases.
--
--   Basically this library could be used when we need coalesce
--   several files in arbitrary way. We could map at any position as
--   long as offset + size fit in 'Int'.
--
--   For other details see:
--
--   > http://hackage.haskell.org/package/mmap
--   > man mmap
--
{-# LANGUAGE RecordWildCards #-}
module System.IO.MMap.Fixed
       ( -- * Intervals
         FixedOffset, FileOffset, FixedInterval, FileInterval
       , interval, fileInterval

         -- * Construction
       , Fixed, Bytes
       , System.IO.MMap.Fixed.empty, insertTo
       , coalesceFiles

         -- ** Specialized 'insertTo'
       , mmapTo, mallocTo
       , lookupRegion

         -- * Query
       , upperAddr

         -- * Access
       , viewBytes, readBytes, writeBytes
       , readElem, writeElem
       ) where

import Data.ByteString.Lazy as Lazy
import Data.ByteString.Lazy.Internal as Lazy
import Data.ByteString.Internal as B
import Data.List as L
import Data.Int
import Data.IntervalMap.Strict as M
import Data.IntervalMap.Interval
import System.IO.MMap
import Foreign


type FixedOffset = Int
type FileOffset  = Int64
type Size   = Int


type FileInterval  = (FileOffset, Size)
type FixedInterval = Interval FixedOffset


interval :: FixedOffset -> Size -> FixedInterval
interval off s = IntervalCO off (off + fromIntegral (max 0 s))
{-# INLINE interval #-}

fileInterval :: FileOffset -> Size -> FileInterval
fileInterval off s = (off, s)
{-# INLINE fileInterval #-}

intervalSize :: FixedInterval -> Size
intervalSize i = upperBound i - lowerBound i
{-# INLINE intervalSize #-}


type Bytes = (ForeignPtr Word8, Size)

type FixedMap = IntervalMap FixedOffset Bytes

newtype Fixed = Fixed { imap :: FixedMap }

instance Show Fixed where
  show = show . M.toList . imap


mapIM :: (FixedMap -> FixedMap) -> Fixed -> Fixed
mapIM f s = s { imap = f (imap s) }

empty :: Fixed
empty = Fixed M.empty

coalesceFiles :: [(FilePath, Int)] -> IO Fixed
coalesceFiles = go 0 System.IO.MMap.Fixed.empty
  where
    go _      s []       = return s
    go offset s ((path, bsize) : xs) = do
      s' <- mmapTo path (0, bsize) offset s
      go (offset + bsize) s' xs

upperAddr :: Fixed -> FixedOffset
upperAddr = upperBound . fst . findLast . imap

insertTo :: FixedInterval -> Bytes -> Fixed -> Fixed
insertTo fi mm = mapIM (M.insert fi mm)
{-# INLINE insertTo #-}

mmapTo :: FilePath -> FileInterval -> FixedOffset -> Fixed -> IO Fixed
mmapTo path mrange to s = do
  (fptr, offset, fsize) <- mmapFileForeignPtr path ReadWriteEx (Just mrange)

  let fixed  = interval to fsize
  let mmaped = (fptr, offset)

  return $ insertTo fixed mmaped s

mallocTo :: FixedInterval -> Fixed -> IO Fixed
mallocTo fi s = do
  let bsize = intervalSize fi
  fptr <- mallocForeignPtrBytes bsize
  return (insertTo fi (fptr, 0) s)

lookupRegion :: FixedOffset -> Fixed -> Maybe B.ByteString
lookupRegion offset Fixed {..} =
  case intersecting imap $ IntervalCO offset (succ offset) of
    [(i, (fptr, off))] -> let s = max 0 $ upperBound i - lowerBound i
                          in  Just $ fromForeignPtr fptr off s
    _         -> Nothing

-- | Note: this is unsafe operation.
viewBytes :: FixedInterval -> Fixed -> Lazy.ByteString
viewBytes fi s = fromChunks $ L.map mk $ (imap s `intersecting` fi)
  where
   mk (i, (fptr, offset)) =
     let dropB = max 0 (lowerBound fi - lowerBound i)
         dropT = max 0 (upperBound i  - upperBound fi)
         bsize = intervalSize i - (dropT + dropB)
     in fromForeignPtr fptr (offset + dropB) bsize


readBytes :: FixedInterval -> Fixed -> IO Lazy.ByteString
readBytes fi s = let c = Lazy.copy (viewBytes fi s) in mkCopy c >> return c
{-# INLINE readBytes #-}

writeBytes :: FixedInterval -> Lazy.ByteString -> Fixed -> IO ()
writeBytes fi bs s = bscpy (viewBytes fi s) bs
{-# INLINE writeBytes #-}

-- | Note: this operation takes O(log(files count)) time, if possible
-- use readBytes.
readElem :: Storable a => Fixed -> FixedOffset -> IO a
readElem s offset = go undefined
  where
    go :: Storable a => a -> IO a
    go dont_touch = do
      let bsize = sizeOf dont_touch
      let PS fptr off _ = Lazy.toStrict (viewBytes (interval offset bsize) s)
      withForeignPtr fptr $ \ ptr -> peekByteOff ptr off

writeElem :: Storable a => Fixed -> FixedOffset -> a -> IO ()
writeElem s offset x = do
  let bsize = sizeOf x
  let PS fptr off _ = Lazy.toStrict (viewBytes (interval offset bsize) s)
  withForeignPtr fptr $ \ptr -> pokeByteOff ptr off x


mkCopy :: Lazy.ByteString -> IO ()
mkCopy Empty = return ()
mkCopy (Chunk _ x) = mkCopy x

bscpy :: Lazy.ByteString -> Lazy.ByteString -> IO ()
bscpy (PS _ _ 0 `Chunk` dest_rest) src                         = bscpy dest_rest src
bscpy dest                         (PS _ _ 0 `Chunk` src_rest) = bscpy dest      src_rest
bscpy (PS dest_fptr dest_off dest_size `Chunk` dest_rest)
      (PS src_fptr  src_off  src_size  `Chunk` src_rest)
  = do let csize = min dest_size src_size
       withForeignPtr dest_fptr $ \dest_ptr ->
         withForeignPtr src_fptr $ \src_ptr ->
           memcpy (dest_ptr `advancePtr` dest_off)
                  (src_ptr  `advancePtr` src_off)
                  (fromIntegral csize) -- TODO memmove?
       bscpy (PS dest_fptr (dest_off + csize) (dest_size - csize) `Chunk` dest_rest)
             (PS src_fptr  (src_off  + csize) (src_size  - csize) `Chunk` src_rest)
bscpy _ _ = return ()