summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Text/XML/Stream/Render.hs357
-rw-r--r--Text/XML/Stream/Token.hs174
2 files changed, 531 insertions, 0 deletions
diff --git a/Text/XML/Stream/Render.hs b/Text/XML/Stream/Render.hs
new file mode 100644
index 00000000..c758acad
--- /dev/null
+++ b/Text/XML/Stream/Render.hs
@@ -0,0 +1,357 @@
1{-# LANGUAGE OverloadedStrings #-}
2{-# LANGUAGE FlexibleContexts #-}
3-- | 'Enumeratee's to render XML 'Event's. Unlike libxml-enumerator and
4-- expat-enumerator, this module does not provide IO and ST variants, since the
5-- underlying rendering operations are pure functions.
6module Text.XML.Stream.Render
7 ( renderBuilder
8 , renderBuilderFlush
9 , renderBytes
10 , renderText
11 , RenderSettings
12 , def
13 , rsPretty
14 , rsNamespaces
15 , rsAttrOrder
16 , orderAttrs
17 , prettify
18 ) where
19
20import Data.XML.Types (Event (..), Content (..), Name (..))
21import Text.XML.Stream.Token
22import qualified Data.Text as T
23import Data.Text (Text)
24import Blaze.ByteString.Builder
25import Data.Conduit.Blaze (builderToByteString)
26import qualified Data.Map as Map
27import Data.Map (Map)
28import Data.Maybe (fromMaybe, mapMaybe)
29import Data.ByteString (ByteString)
30import Data.Default (Default (def))
31import qualified Data.Set as Set
32import Data.List (foldl')
33import Data.Conduit
34import qualified Data.Conduit.List as CL
35import qualified Data.Conduit.Text as CT
36import Data.Monoid (mempty)
37
38-- | Render a stream of 'Event's into a stream of 'ByteString's. This function
39-- wraps around 'renderBuilder' and 'builderToByteString', so it produces
40-- optimally sized 'ByteString's with minimal buffer copying.
41--
42-- The output is UTF8 encoded.
43renderBytes :: MonadUnsafeIO m => RenderSettings -> Conduit Event m ByteString
44renderBytes rs = renderBuilder rs =$= builderToByteString
45
46-- | Render a stream of 'Event's into a stream of 'ByteString's. This function
47-- wraps around 'renderBuilder', 'builderToByteString' and 'renderBytes', so it
48-- produces optimally sized 'ByteString's with minimal buffer copying.
49renderText :: (MonadThrow m, MonadUnsafeIO m)
50 => RenderSettings -> Conduit Event m Text
51renderText rs = renderBytes rs =$= CT.decode CT.utf8
52
53data RenderSettings = RenderSettings
54 { rsPretty :: Bool
55 , rsNamespaces :: [(Text, Text)]
56 -- ^ Defines some top level namespace definitions to be used, in the form
57 -- of (prefix, namespace). This has absolutely no impact on the meaning
58 -- of your documents, but can increase readability by moving commonly
59 -- used namespace declarations to the top level.
60 , rsAttrOrder :: Name -> Map.Map Name Text -> [(Name, Text)]
61 -- ^ Specify how to turn the unordered attributes used by the "Text.XML"
62 -- module into an ordered list.
63 }
64
65instance Default RenderSettings where
66 def = RenderSettings
67 { rsPretty = False
68 , rsNamespaces = []
69 , rsAttrOrder = const Map.toList
70 }
71
72-- | Convenience function to create an ordering function suitable for
73-- use as the value of 'rsAttrOrder'. The ordering function is created
74-- from an explicit ordering of the attributes, specified as a list of
75-- tuples, as follows: In each tuple, the first component is the
76-- 'Name' of an element, and the second component is a list of
77-- attributes names. When the given element is rendered, the
78-- attributes listed, when present, appear first in the given order,
79-- followed by any other attributes in arbitrary order. If an element
80-- does not appear, all of its attributes are rendered in arbitrary
81-- order.
82orderAttrs :: [(Name, [Name])] ->
83 Name -> Map Name Text -> [(Name, Text)]
84orderAttrs orderSpec = order
85 where
86 order elt attrMap =
87 let initialAttrs = fromMaybe [] $ lookup elt orderSpec
88 mkPair attr = fmap ((,) attr) $ Map.lookup attr attrMap
89 otherAttrMap =
90 Map.filterWithKey (const . not . (`elem` initialAttrs)) attrMap
91 in mapMaybe mkPair initialAttrs ++ Map.toAscList otherAttrMap
92
93-- | Render a stream of 'Event's into a stream of 'Builder's. Builders are from
94-- the blaze-builder package, and allow the create of optimally sized
95-- 'ByteString's with minimal buffer copying.
96renderBuilder :: Monad m => RenderSettings -> Conduit Event m Builder
97renderBuilder RenderSettings { rsPretty = True, rsNamespaces = n } = prettify =$= renderBuilder' n True
98renderBuilder RenderSettings { rsPretty = False, rsNamespaces = n } = renderBuilder' n False
99
100renderBuilder' :: Monad m => [(Text, Text)] -> Bool -> Conduit Event m Builder
101renderBuilder' namespaces0 isPretty = do
102 loop []
103 where
104 loop nslevels = await >>= maybe (return ()) (go nslevels)
105
106 go nslevels e =
107 case e of
108 EventBeginElement n1 as -> do
109 mnext <- CL.peek
110 isClosed <-
111 case mnext of
112 Just (EventEndElement n2) | n1 == n2 -> do
113 CL.drop 1
114 return True
115 _ -> return False
116 let (token, nslevels') = mkBeginToken isPretty isClosed namespaces0 nslevels n1 as
117 yield token
118 loop nslevels'
119 _ -> do
120 let (token, nslevels') = eventToToken nslevels e
121 yield token
122 loop nslevels'
123
124renderBuilderFlush :: Monad m => RenderSettings -> Conduit (Flush Event) m (Flush Builder)
125-- pretty printing not implemented.
126-- renderBuilderFlush RenderSettings { rsPretty = True, rsNamespaces = n } = flushPassThrough prettify =$= renderBuilder' n True
127renderBuilderFlush RenderSettings { rsPretty = False, rsNamespaces = n } = renderBuilderFlush' n False
128
129renderBuilderFlush' :: Monad m => [(Text, Text)] -> Bool -> Conduit (Flush Event) m (Flush Builder)
130renderBuilderFlush' namespaces0 isPretty = do
131 loop []
132 where
133 loop nslevels = await >>= maybe (return ()) (go nslevels)
134
135 go nslevels fe =
136 case fe of
137 Chunk e@(EventBeginElement n1 as) -> do
138 mnext <- CL.peek
139 isClosed <-
140 case mnext of
141 Just (Chunk (EventEndElement n2)) | n1 == n2 -> do
142 CL.drop 1
143 return True
144 _ -> return False
145 let (token, nslevels') = mkBeginToken isPretty isClosed namespaces0 nslevels n1 as
146 yield (Chunk token)
147 loop nslevels'
148 Chunk e -> do
149 let (token, nslevels') = eventToToken nslevels e
150 yield (Chunk token)
151 loop nslevels'
152 Flush -> yield Flush >> loop nslevels
153
154eventToToken :: Stack -> Event -> (Builder, [NSLevel])
155eventToToken s EventBeginDocument =
156 (tokenToBuilder $ TokenBeginDocument
157 [ ("version", [ContentText "1.0"])
158 , ("encoding", [ContentText "UTF-8"])
159 ]
160 , s)
161eventToToken s EventEndDocument = (mempty, s)
162eventToToken s (EventInstruction i) = (tokenToBuilder $ TokenInstruction i, s)
163eventToToken s (EventBeginDoctype n meid) = (tokenToBuilder $ TokenDoctype n meid [], s)
164eventToToken s EventEndDoctype = (mempty, s)
165eventToToken s (EventCDATA t) = (tokenToBuilder $ TokenCDATA t, s)
166eventToToken s (EventEndElement name) =
167 (tokenToBuilder $ TokenEndElement $ nameToTName sl name, s')
168 where
169 (sl:s') = s
170eventToToken s (EventContent c) = (tokenToBuilder $ TokenContent c, s)
171eventToToken s (EventComment t) = (tokenToBuilder $ TokenComment t, s)
172eventToToken _ EventBeginElement{} = error "eventToToken on EventBeginElement" -- mkBeginToken False s name attrs
173
174type Stack = [NSLevel]
175
176nameToTName :: NSLevel -> Name -> TName
177nameToTName _ (Name name _ (Just pref))
178 | pref == "xml" = TName (Just "xml") name
179nameToTName _ (Name name Nothing _) = TName Nothing name -- invariant that this is true
180nameToTName (NSLevel def' sl) (Name name (Just ns) _)
181 | def' == Just ns = TName Nothing name
182 | otherwise =
183 case Map.lookup ns sl of
184 Nothing -> error "nameToTName"
185 Just pref -> TName (Just pref) name
186
187mkBeginToken :: Bool -- ^ pretty print attributes?
188 -> Bool -- ^ self closing?
189 -> [(Text, Text)] -- ^ namespaces to apply to top-level
190 -> Stack
191 -> Name
192 -> [(Name, [Content])]
193 -> (Builder, Stack)
194mkBeginToken isPretty isClosed namespaces0 s name attrs =
195 (tokenToBuilder $ TokenBeginElement tname tattrs3 isClosed indent,
196 if isClosed then s else sl3 : s)
197 where
198 indent = if isPretty then 2 + 4 * length s else 0
199 prevsl = case s of
200 [] -> NSLevel Nothing Map.empty
201 sl':_ -> sl'
202 (sl1, tname, tattrs1) = newElemStack prevsl name
203 (sl2, tattrs2) = foldr newAttrStack (sl1, tattrs1) $ nubAttrs attrs
204 (sl3, tattrs3) =
205 case s of
206 [] -> (sl2 { prefixes = Map.union (prefixes sl2) $ Map.fromList namespaceSL }, namespaceAttrs ++ tattrs2)
207 _ -> (sl2, tattrs2)
208
209 (namespaceSL, namespaceAttrs) = unzip $ mapMaybe unused namespaces0
210 unused (k, v) =
211 case lookup k' tattrs2 of
212 Just{} -> Nothing
213 Nothing -> Just ((v, k), (k', v'))
214 where
215 k' = TName (Just "xmlns") k
216 v' = [ContentText v]
217
218newElemStack :: NSLevel -> Name -> (NSLevel, TName, [TAttribute])
219newElemStack nsl@(NSLevel def' _) (Name local ns _)
220 | def' == ns = (nsl, TName Nothing local, [])
221newElemStack (NSLevel _ nsmap) (Name local Nothing _) =
222 (NSLevel Nothing nsmap, TName Nothing local, [(TName Nothing "xmlns", [])])
223newElemStack (NSLevel _ nsmap) (Name local (Just ns) Nothing) =
224 (NSLevel (Just ns) nsmap, TName Nothing local, [(TName Nothing "xmlns", [ContentText ns])])
225newElemStack (NSLevel def' nsmap) (Name local (Just ns) (Just pref)) =
226 case Map.lookup ns nsmap of
227 Just pref'
228 | pref == pref' ->
229 ( NSLevel def' nsmap
230 , TName (Just pref) local
231 , []
232 )
233 _ -> ( NSLevel def' nsmap'
234 , TName (Just pref) local
235 , [(TName (Just "xmlns") pref, [ContentText ns])]
236 )
237 where
238 nsmap' = Map.insert ns pref nsmap
239
240newAttrStack :: (Name, [Content]) -> (NSLevel, [TAttribute]) -> (NSLevel, [TAttribute])
241newAttrStack (name, value) (NSLevel def' nsmap, attrs) =
242 (NSLevel def' nsmap', addNS $ (tname, value) : attrs)
243 where
244 (nsmap', tname, addNS) =
245 case name of
246 Name local Nothing _ -> (nsmap, TName Nothing local, id)
247 Name local (Just ns) mpref ->
248 let ppref = fromMaybe "ns" mpref
249 (pref, addNS') = getPrefix ppref nsmap ns
250 in (Map.insert ns pref nsmap, TName (Just pref) local, addNS')
251
252getPrefix :: Text -> Map Text Text -> Text -> (Text, [TAttribute] -> [TAttribute])
253getPrefix _ _ "http://www.w3.org/XML/1998/namespace" = ("xml", id)
254getPrefix ppref nsmap ns =
255 case Map.lookup ns nsmap of
256 Just pref -> (pref, id)
257 Nothing ->
258 let pref = findUnused ppref $ Map.elems nsmap
259 in (pref, (:) (TName (Just "xmlns") pref, [ContentText ns]))
260 where
261 findUnused x xs
262 | x `elem` xs = findUnused (x `T.snoc` '_') xs
263 | otherwise = x
264
265-- | Convert a stream of 'Event's into a prettified one, adding extra
266-- whitespace. Note that this can change the meaning of your XML.
267prettify :: Monad m => Conduit Event m Event
268prettify = prettify' 0
269
270prettify' :: Monad m => Int -> Conduit Event m Event
271prettify' level =
272 await >>= maybe (return ()) go
273 where
274 go e@EventBeginDocument = do
275 yield e
276 yield $ EventContent $ ContentText "\n"
277 prettify' level
278 go e@EventBeginElement{} = do
279 yield before
280 yield e
281 mnext <- CL.peek
282 case mnext of
283 Just next@EventEndElement{} -> do
284 CL.drop 1
285 yield next
286 yield after
287 prettify' level
288 _ -> do
289 yield after
290 prettify' $ level + 1
291 go e@EventEndElement{} = do
292 let level' = max 0 $ level - 1
293 yield $ before' level'
294 yield e
295 yield after
296 prettify' level'
297 go (EventContent c) = do
298 cs <- takeContents (c:)
299 let cs' = mapMaybe normalize cs
300 case cs' of
301 [] -> return ()
302 _ -> do
303 yield before
304 mapM_ (yield . EventContent) cs'
305 yield after
306 prettify' level
307 go (EventCDATA t) = go $ EventContent $ ContentText t
308 go e@EventInstruction{} = do
309 yield before
310 yield e
311 yield after
312 prettify' level
313 go (EventComment t) = do
314 yield before
315 yield $ EventComment $ T.concat
316 [ " "
317 , T.unwords $ T.words t
318 , " "
319 ]
320 yield after
321 prettify' level
322
323 go e@EventEndDocument = yield e >> prettify' level
324 go e@EventBeginDoctype{} = yield e >> prettify' level
325 go e@EventEndDoctype{} = yield e >> yield after >> prettify' level
326
327 takeContents front = do
328 me <- CL.peek
329 case me of
330 Just (EventContent c) -> do
331 CL.drop 1
332 takeContents $ front . (c:)
333 Just (EventCDATA t) -> do
334 CL.drop 1
335 takeContents $ front . (ContentText t:)
336 _ -> return $ front []
337
338 normalize (ContentText t)
339 | T.null t' = Nothing
340 | otherwise = Just $ ContentText t'
341 where
342 t' = T.unwords $ T.words t
343 normalize c = Just c
344
345 before = EventContent $ ContentText $ T.replicate level " "
346 before' l = EventContent $ ContentText $ T.replicate l " "
347 after = EventContent $ ContentText "\n"
348
349nubAttrs :: [(Name, v)] -> [(Name, v)]
350nubAttrs orig =
351 front []
352 where
353 (front, _) = foldl' go (id, Set.empty) orig
354 go (dlist, used) (k, v)
355 | k `Set.member` used = (dlist, used)
356 | otherwise = (dlist . ((k, v):), Set.insert k used)
357
diff --git a/Text/XML/Stream/Token.hs b/Text/XML/Stream/Token.hs
new file mode 100644
index 00000000..40f5a52b
--- /dev/null
+++ b/Text/XML/Stream/Token.hs
@@ -0,0 +1,174 @@
1{-# LANGUAGE OverloadedStrings #-}
2{-# LANGUAGE FlexibleContexts #-}
3module Text.XML.Stream.Token
4 ( tokenToBuilder
5 , TName (..)
6 , Token (..)
7 , TAttribute
8 , NSLevel (..)
9 ) where
10
11import Data.XML.Types (Instruction (..), Content (..), ExternalID (..))
12import qualified Data.Text as T
13import Data.Text (Text)
14import Data.String (IsString (fromString))
15import Blaze.ByteString.Builder
16 (Builder, fromByteString, writeByteString, copyByteString)
17import Blaze.ByteString.Builder.Internal.Write (fromWriteList)
18import Blaze.ByteString.Builder.Char.Utf8 (writeChar, fromText)
19import Data.Monoid (mconcat, mempty, mappend)
20import Data.ByteString.Char8 ()
21import Data.Map (Map)
22import qualified Blaze.ByteString.Builder.Char8 as BC8
23import qualified Data.Set as Set
24import Data.List (foldl')
25import Control.Arrow (first)
26
27oneSpace :: Builder
28oneSpace = copyByteString " "
29
30data Token = TokenBeginDocument [TAttribute]
31 | TokenInstruction Instruction
32 | TokenBeginElement TName [TAttribute] Bool Int -- ^ indent
33 | TokenEndElement TName
34 | TokenContent Content
35 | TokenComment Text
36 | TokenDoctype Text (Maybe ExternalID) [(Text, Text)]
37 | TokenCDATA Text
38 deriving Show
39tokenToBuilder :: Token -> Builder
40tokenToBuilder (TokenBeginDocument attrs) =
41 fromByteString "<?xml"
42 `mappend` foldAttrs oneSpace attrs (fromByteString "?>")
43tokenToBuilder (TokenInstruction (Instruction target data_)) = mconcat
44 [ fromByteString "<?"
45 , fromText target
46 , fromByteString " "
47 , fromText data_
48 , fromByteString "?>"
49 ]
50tokenToBuilder (TokenBeginElement name attrs' isEmpty indent) =
51 copyByteString "<"
52 `mappend` tnameToText name
53 `mappend` foldAttrs
54 (if indent == 0 || lessThan3 attrs
55 then oneSpace
56 else BC8.fromString ('\n' : replicate indent ' '))
57 attrs
58 (if isEmpty then fromByteString "/>" else fromByteString ">")
59 where
60 attrs = nubAttrs $ map (first splitTName) attrs'
61 lessThan3 [] = True
62 lessThan3 [_] = True
63 lessThan3 [_, _] = True
64 lessThan3 _ = False
65tokenToBuilder (TokenEndElement name) = mconcat
66 [ fromByteString "</"
67 , tnameToText name
68 , fromByteString ">"
69 ]
70tokenToBuilder (TokenContent c) = contentToText c
71tokenToBuilder (TokenCDATA t) =
72 copyByteString "<![CDATA["
73 `mappend` fromText t
74 `mappend` copyByteString "]]>"
75tokenToBuilder (TokenComment t) = mconcat [fromByteString "<!--", fromText t, fromByteString "-->"]
76tokenToBuilder (TokenDoctype name eid _) = mconcat
77 [ fromByteString "<!DOCTYPE "
78 , fromText name
79 , go eid
80 , fromByteString ">"
81 ]
82 where
83 go Nothing = mempty
84 go (Just (SystemID uri)) = mconcat
85 [ fromByteString " SYSTEM \""
86 , fromText uri
87 , fromByteString "\""
88 ]
89 go (Just (PublicID pid uri)) = mconcat
90 [ fromByteString " PUBLIC \""
91 , fromText pid
92 , fromByteString "\" \""
93 , fromText uri
94 , fromByteString "\""
95 ]
96
97data TName = TName (Maybe Text) Text
98 deriving (Show, Eq, Ord)
99
100tnameToText :: TName -> Builder
101tnameToText (TName Nothing name) = fromText name
102tnameToText (TName (Just prefix) name) = mconcat [fromText prefix, fromByteString ":", fromText name]
103
104contentToText :: Content -> Builder
105contentToText (ContentText t) =
106 fromWriteList go $ T.unpack t
107 where
108 go '<' = writeByteString "&lt;"
109 go '>' = writeByteString "&gt;"
110 go '&' = writeByteString "&amp;"
111 -- Not escaping quotes, since this is only called outside of attributes
112 go c = writeChar c
113contentToText (ContentEntity e) = mconcat
114 [ fromByteString "&"
115 , fromText e
116 , fromByteString ";"
117 ]
118
119type TAttribute = (TName, [Content])
120
121foldAttrs :: Builder -- ^ before
122 -> [TAttribute]
123 -> Builder
124 -> Builder
125foldAttrs before attrs rest' =
126 foldr go rest' attrs
127 where
128 go (key, val) rest =
129 before
130 `mappend` tnameToText key
131 `mappend` copyByteString "=\""
132 `mappend` foldr go' (fromByteString "\"" `mappend` rest) val
133 go' (ContentText t) rest =
134 fromWriteList h (T.unpack t) `mappend` rest
135 where
136 h '<' = writeByteString "&lt;"
137 h '>' = writeByteString "&gt;"
138 h '&' = writeByteString "&amp;"
139 h '"' = writeByteString "&quot;"
140 -- Not escaping single quotes, since our attributes are always double
141 -- quoted
142 h c = writeChar c
143 go' (ContentEntity t) rest =
144 fromByteString "&"
145 `mappend` fromText t
146 `mappend` fromByteString ";"
147 `mappend` rest
148
149instance IsString TName where
150 fromString = TName Nothing . T.pack
151
152data NSLevel = NSLevel
153 { defaultNS :: Maybe Text
154 , prefixes :: Map Text Text
155 }
156 deriving Show
157
158nubAttrs :: [TAttribute] -> [TAttribute]
159nubAttrs orig =
160 front []
161 where
162 (front, _) = foldl' go (id, Set.empty) orig
163 go (dlist, used) (k, v)
164 | k `Set.member` used = (dlist, used)
165 | otherwise = (dlist . ((k, v):), Set.insert k used)
166
167splitTName :: TName -> TName
168splitTName x@(TName Just{} _) = x
169splitTName x@(TName Nothing t)
170 | T.null b = x
171 | otherwise = TName (Just a) $ T.drop 1 b
172 where
173 (a, b) = T.break (== ':') t
174