diff options
Diffstat (limited to 'src/LambdaCube/Compiler/Parser.hs')
-rw-r--r-- | src/LambdaCube/Compiler/Parser.hs | 345 |
1 files changed, 33 insertions, 312 deletions
diff --git a/src/LambdaCube/Compiler/Parser.hs b/src/LambdaCube/Compiler/Parser.hs index 2d7eb49f..31012fdd 100644 --- a/src/LambdaCube/Compiler/Parser.hs +++ b/src/LambdaCube/Compiler/Parser.hs | |||
@@ -7,8 +7,6 @@ | |||
7 | {-# LANGUAGE OverloadedStrings #-} | 7 | {-# LANGUAGE OverloadedStrings #-} |
8 | {-# LANGUAGE DeriveFunctor #-} | 8 | {-# LANGUAGE DeriveFunctor #-} |
9 | {-# LANGUAGE ScopedTypeVariables #-} | 9 | {-# LANGUAGE ScopedTypeVariables #-} |
10 | {-# OPTIONS_GHC -fno-warn-orphans #-} -- instance NFData SourcePos | ||
11 | -- {-# OPTIONS_GHC -O0 #-} | ||
12 | module LambdaCube.Compiler.Parser | 10 | module LambdaCube.Compiler.Parser |
13 | {- todo ( definitions | 11 | {- todo ( definitions |
14 | , extensions | 12 | , extensions |
@@ -28,8 +26,6 @@ import Data.Maybe | |||
28 | import Data.List | 26 | import Data.List |
29 | import Data.Char | 27 | import Data.Char |
30 | import Data.String | 28 | import Data.String |
31 | import Data.Set (Set) | ||
32 | import qualified Data.Set as Set | ||
33 | import qualified Data.Map as Map | 29 | import qualified Data.Map as Map |
34 | 30 | ||
35 | import Control.Monad.Except | 31 | import Control.Monad.Except |
@@ -38,19 +34,16 @@ import Control.Monad.Writer | |||
38 | import Control.Monad.State | 34 | import Control.Monad.State |
39 | import Control.Arrow hiding ((<+>)) | 35 | import Control.Arrow hiding ((<+>)) |
40 | import Control.Applicative | 36 | import Control.Applicative |
41 | import Control.DeepSeq | ||
42 | 37 | ||
43 | import Text.Parsec hiding (label, Empty, State, (<|>), many, try) | 38 | import Text.Parsec hiding (label, Empty, State, (<|>), many, try) |
44 | import qualified Text.Parsec as Pa | 39 | import qualified Text.Parsec as Pa |
45 | import qualified Text.Parsec.Token as Pa | ||
46 | import qualified Text.ParserCombinators.Parsec.Language as Pa | ||
47 | import Text.Parsec.Pos | 40 | import Text.Parsec.Pos |
48 | import Text.Parsec.Indentation hiding (Any) | 41 | import Text.Parsec.Indentation hiding (Any) |
49 | import Text.Parsec.Indentation.Char | 42 | import Text.Parsec.Indentation.Char |
50 | 43 | ||
51 | import qualified LambdaCube.Compiler.Pretty as P | 44 | import qualified LambdaCube.Compiler.Pretty as P |
52 | import LambdaCube.Compiler.Pretty hiding (Doc, braces, parens) | 45 | import LambdaCube.Compiler.Pretty hiding (Doc, braces, parens) |
53 | import LambdaCube.Compiler.Token | 46 | import LambdaCube.Compiler.Lexer |
54 | 47 | ||
55 | -------------------------------------------------------------------------------- utils | 48 | -------------------------------------------------------------------------------- utils |
56 | 49 | ||
@@ -78,309 +71,7 @@ traceD x = if debug then trace_ x else id | |||
78 | 71 | ||
79 | debug = False--True--tr | 72 | debug = False--True--tr |
80 | 73 | ||
81 | -------------------------------------------------------------------------------- parser utils | 74 | try = try_ |
82 | |||
83 | -- see http://blog.ezyang.com/2014/05/parsec-try-a-or-b-considered-harmful/comment-page-1/#comment-6602 | ||
84 | try s m = Pa.try m <?> s | ||
85 | |||
86 | -- n, m >= 1, n < m | ||
87 | manyNM n m p = do | ||
88 | xs <- many1 p | ||
89 | let lxs = length xs | ||
90 | unless (n <= lxs && lxs <= m) . fail $ unwords ["manyNM", show n, show m, "found", show lxs, "occurences."] | ||
91 | return xs | ||
92 | |||
93 | -------------------------------------------------------------------------------- parser type | ||
94 | |||
95 | type P = ParsecT (IndentStream (CharIndentStream String)) SourcePos InnerP | ||
96 | type InnerP = WriterT [PostponedCheck] (Reader (DesugarInfo, Namespace)) | ||
97 | |||
98 | type PostponedCheck = Maybe String | ||
99 | |||
100 | type DesugarInfo = (FixityMap, ConsMap) | ||
101 | |||
102 | type ConsMap = Map.Map SName{-constructor name-} | ||
103 | (Either ((SName{-type name-}, Int{-num of indices-}), [(SName, Int)]{-constructors with arities-}) | ||
104 | Int{-arity-}) | ||
105 | |||
106 | dsInfo :: P DesugarInfo | ||
107 | dsInfo = asks fst | ||
108 | |||
109 | namespace :: P Namespace | ||
110 | namespace = asks snd | ||
111 | |||
112 | -------------------------------------------------------------------------------- lexing | ||
113 | |||
114 | {-# NOINLINE lexer #-} | ||
115 | lexer :: Pa.GenTokenParser (IndentStream (CharIndentStream String)) SourcePos InnerP | ||
116 | lexer = makeTokenParser lexeme $ makeIndentLanguageDef style | ||
117 | where | ||
118 | style = Pa.LanguageDef | ||
119 | { Pa.commentStart = Pa.commentStart Pa.haskellDef | ||
120 | , Pa.commentEnd = Pa.commentEnd Pa.haskellDef | ||
121 | , Pa.commentLine = Pa.commentLine Pa.haskellDef | ||
122 | , Pa.nestedComments = Pa.nestedComments Pa.haskellDef | ||
123 | , Pa.identStart = letter <|> char '_' -- '_' is included also | ||
124 | , Pa.identLetter = alphaNum <|> oneOf "_'#" | ||
125 | , Pa.opStart = Pa.opLetter style | ||
126 | , Pa.opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~" | ||
127 | , Pa.reservedOpNames = Pa.reservedOpNames Pa.haskellDef | ||
128 | , Pa.reservedNames = Pa.reservedNames Pa.haskellDef | ||
129 | , Pa.caseSensitive = Pa.caseSensitive Pa.haskellDef | ||
130 | } | ||
131 | |||
132 | lexeme p = p <* (getPosition >>= setState >> whiteSpace) | ||
133 | |||
134 | parens = Pa.parens lexer | ||
135 | braces = Pa.braces lexer | ||
136 | brackets = Pa.brackets lexer | ||
137 | commaSep = Pa.commaSep lexer | ||
138 | commaSep1 = Pa.commaSep1 lexer | ||
139 | --dot = Pa.dot lexer | ||
140 | --comma = Pa.comma lexer | ||
141 | colon = Pa.colon lexer | ||
142 | natural = Pa.natural lexer | ||
143 | --integer = Pa.integer lexer | ||
144 | float = Pa.float lexer | ||
145 | charLiteral = Pa.charLiteral lexer | ||
146 | stringLiteral = Pa.stringLiteral lexer | ||
147 | whiteSpace = Pa.whiteSpace lexer | ||
148 | operator = Pa.operator lexer | ||
149 | reserved = Pa.reserved lexer | ||
150 | reservedOp = Pa.reservedOp lexer | ||
151 | identifier = Pa.identifier lexer | ||
152 | |||
153 | |||
154 | -------------------------------------------------------------------------------- names | ||
155 | |||
156 | type SName = String | ||
157 | |||
158 | caseName (c:cs) = toLower c: cs ++ "Case" | ||
159 | |||
160 | pattern MatchName cs <- (getMatchName -> Just cs) where MatchName = matchName | ||
161 | |||
162 | matchName cs = "match" ++ cs | ||
163 | getMatchName ('m':'a':'t':'c':'h':cs) = Just cs | ||
164 | getMatchName _ = Nothing | ||
165 | |||
166 | |||
167 | -------------------------------------------------------------------------------- source infos | ||
168 | |||
169 | instance NFData SourcePos where | ||
170 | rnf x = x `seq` () | ||
171 | |||
172 | data Range = Range SourcePos SourcePos | ||
173 | deriving (Eq, Ord) | ||
174 | |||
175 | instance NFData Range where | ||
176 | rnf (Range a b) = rnf a `seq` rnf b `seq` () | ||
177 | |||
178 | instance PShow Range where | ||
179 | pShowPrec _ (Range b e) | sourceName b == sourceName e = text (sourceName b) <+> f b <> "-" <> f e | ||
180 | where | ||
181 | f x = pShow (sourceLine x) <> ":" <> pShow (sourceColumn x) | ||
182 | |||
183 | joinRange :: Range -> Range -> Range | ||
184 | joinRange (Range b e) (Range b' e') = Range (min b b') (max e e') | ||
185 | |||
186 | data SI | ||
187 | = NoSI (Set String) -- no source info, attached debug info | ||
188 | | RangeSI Range | ||
189 | |||
190 | instance Show SI where show _ = "SI" | ||
191 | instance Eq SI where _ == _ = True | ||
192 | instance Ord SI where _ `compare` _ = EQ | ||
193 | |||
194 | instance Monoid SI where | ||
195 | mempty = NoSI Set.empty | ||
196 | mappend (RangeSI r1) (RangeSI r2) = RangeSI (joinRange r1 r2) | ||
197 | mappend (NoSI ds1) (NoSI ds2) = NoSI (ds1 `Set.union` ds2) | ||
198 | mappend r@RangeSI{} _ = r | ||
199 | mappend _ r@RangeSI{} = r | ||
200 | |||
201 | instance PShow SI where | ||
202 | pShowPrec _ (NoSI ds) = hsep $ map pShow $ Set.toList ds | ||
203 | pShowPrec _ (RangeSI r) = pShow r | ||
204 | |||
205 | showSI_ _ (NoSI ds) = unwords $ Set.toList ds | ||
206 | showSI_ source (RangeSI (Range s e)) = show str | ||
207 | where | ||
208 | startLine = sourceLine s - 1 | ||
209 | endline = sourceLine e - if sourceColumn e == 1 then 1 else 0 | ||
210 | len = endline - startLine | ||
211 | str = vcat $ text (show s <> ":"){- <+> "-" <+> text (show e)-}: | ||
212 | map text (take len $ drop startLine $ lines source) | ||
213 | ++ [text $ replicate (sourceColumn s - 1) ' ' ++ replicate (sourceColumn e - sourceColumn s) '^' | len == 1] | ||
214 | |||
215 | -- TODO: remove | ||
216 | validSI RangeSI{} = True | ||
217 | validSI _ = False | ||
218 | |||
219 | debugSI a = NoSI (Set.singleton a) | ||
220 | |||
221 | si@(RangeSI r) `validate` xs | all validSI xs && r `notElem` [r | RangeSI r <- xs] = si | ||
222 | _ `validate` _ = mempty | ||
223 | |||
224 | class SourceInfo si where | ||
225 | sourceInfo :: si -> SI | ||
226 | |||
227 | instance SourceInfo SI where | ||
228 | sourceInfo = id | ||
229 | |||
230 | instance SourceInfo si => SourceInfo [si] where | ||
231 | sourceInfo = foldMap sourceInfo | ||
232 | |||
233 | instance SourceInfo ParPat where | ||
234 | sourceInfo (ParPat ps) = sourceInfo ps | ||
235 | |||
236 | instance SourceInfo Pat where | ||
237 | sourceInfo = \case | ||
238 | PVar (si,_) -> si | ||
239 | PCon (si,_) ps -> si <> sourceInfo ps | ||
240 | ViewPat e ps -> sourceInfo e <> sourceInfo ps | ||
241 | PatType ps e -> sourceInfo ps <> sourceInfo e | ||
242 | |||
243 | instance SourceInfo (SExp' a) where | ||
244 | sourceInfo = \case | ||
245 | SGlobal (si, _) -> si | ||
246 | SBind si _ _ e1 e2 -> si | ||
247 | SApp si _ e1 e2 -> si | ||
248 | SLet _ e1 e2 -> sourceInfo e1 <> sourceInfo e2 | ||
249 | SVar (si, _) _ -> si | ||
250 | STyped si _ -> si | ||
251 | SLit si _ -> si | ||
252 | |||
253 | class SetSourceInfo a where | ||
254 | setSI :: SI -> a -> a | ||
255 | |||
256 | instance SetSourceInfo (SExp' a) where | ||
257 | setSI si = \case | ||
258 | SBind _ a b c d -> SBind si a b c d | ||
259 | SApp _ a b c -> SApp si a b c | ||
260 | SLet le a b -> SLet le a b | ||
261 | SVar (_, n) i -> SVar (si, n) i | ||
262 | STyped _ t -> STyped si t | ||
263 | SGlobal (_, n) -> SGlobal (si, n) | ||
264 | SLit _ l -> SLit si l | ||
265 | |||
266 | appRange :: P (SI -> a) -> P a | ||
267 | appRange p = (\p1 a p2 -> a $ RangeSI $ Range p1 p2) <$> getPosition <*> p <*> getState | ||
268 | |||
269 | withRange :: (SI -> a -> b) -> P a -> P b | ||
270 | withRange f p = appRange $ flip f <$> p | ||
271 | |||
272 | infix 9 `withRange` | ||
273 | |||
274 | type SIName = (SI, SName) | ||
275 | |||
276 | parseSIName :: P String -> P SIName | ||
277 | parseSIName = withRange (,) | ||
278 | |||
279 | -------------------------------------------------------------------------------- namespace handling | ||
280 | |||
281 | data Level = TypeLevel | ExpLevel | ||
282 | deriving (Eq, Show) | ||
283 | |||
284 | data Namespace = Namespace | ||
285 | { namespaceLevel :: Maybe Level | ||
286 | , constructorNamespace :: Bool -- True means that the case of the first letter of identifiers matters | ||
287 | } | ||
288 | deriving (Show) | ||
289 | |||
290 | tick :: Namespace -> SName -> SName | ||
291 | tick = (\case TypeLevel -> switchTick; _ -> id) . fromMaybe ExpLevel . namespaceLevel | ||
292 | |||
293 | switchTick ('\'': n) = n | ||
294 | switchTick n = '\'': n | ||
295 | |||
296 | modifyLevel f = local $ second $ \(Namespace l p) -> Namespace (f <$> l) p | ||
297 | |||
298 | typeNS, expNS, switchNS :: P a -> P a | ||
299 | typeNS = modifyLevel $ const TypeLevel | ||
300 | expNS = modifyLevel $ const ExpLevel | ||
301 | switchNS = modifyLevel $ \case ExpLevel -> TypeLevel; TypeLevel -> ExpLevel | ||
302 | |||
303 | -------------------------------------------------------------------------------- identifiers | ||
304 | |||
305 | check msg p m = try msg $ mfilter p m | ||
306 | |||
307 | firstCaseChar ('\'': c: _) = c | ||
308 | firstCaseChar (c: _) = c | ||
309 | |||
310 | upperCase, lowerCase, symbols, colonSymbols :: P SName | ||
311 | --upperCase NonTypeNamespace = mzero -- todo | ||
312 | upperCase = namespace >>= \ns -> (if constructorNamespace ns then check "uppercase ident" (isUpper . firstCaseChar) else id) $ tick ns <$> (identifier <|> try "tick ident" (('\'':) <$ char '\'' <*> identifier)) | ||
313 | lowerCase = namespace >>= \ns -> (if constructorNamespace ns then check "lowercase ident" (isLower . firstCaseChar) else id) identifier | ||
314 | <|> try "underscore ident" (('_':) <$ char '_' <*> identifier) | ||
315 | symbols = check "symbols" ((/=':') . head) operator | ||
316 | colonSymbols = "Cons" <$ reservedOp ":" <|> check "symbols" ((==':') . head) operator | ||
317 | |||
318 | moduleName = {-qualified_ todo-} expNS upperCase | ||
319 | patVar = lowerCase <|> "" <$ reserved "_" | ||
320 | --qIdent = {-qualified_ todo-} (lowerCase <|> upperCase) | ||
321 | backquotedIdent = try "backquoted ident" $ lexeme $ char '`' *> ((:) <$> satisfy isAlpha <*> many (satisfy isAlphaNum)) <* char '`' | ||
322 | operatorT = symbols <|> colonSymbols <|> backquotedIdent | ||
323 | varId = lowerCase <|> parens operatorT | ||
324 | |||
325 | {- | ||
326 | qualified_ id = do | ||
327 | q <- try "qualification" $ upperCase' <* dot | ||
328 | (N t qs n i) <- qualified_ id | ||
329 | return $ N t (q:qs) n i | ||
330 | <|> | ||
331 | id | ||
332 | where | ||
333 | upperCase' = (:) <$> satisfy isUpper <*> many (satisfy isAlphaNum) | ||
334 | -} | ||
335 | |||
336 | -------------------------------------------------------------------------------- fixity handling | ||
337 | |||
338 | data FixityDef = Infix | InfixL | InfixR deriving (Show) | ||
339 | type Fixity = (FixityDef, Int) | ||
340 | type MFixity = Maybe Fixity | ||
341 | type FixityMap = Map.Map SName Fixity | ||
342 | |||
343 | calcPrec | ||
344 | :: (Show e, Show f) | ||
345 | => (f -> e -> e -> e) | ||
346 | -> (f -> Fixity) | ||
347 | -> e | ||
348 | -> [(f, e)] | ||
349 | -> e | ||
350 | calcPrec app getFixity e = compileOps [((Infix, -1), undefined, e)] | ||
351 | where | ||
352 | compileOps [(_, _, e)] [] = e | ||
353 | compileOps acc [] = compileOps (shrink acc) [] | ||
354 | compileOps acc@((p, g, e1): ee) es_@((op, e'): es) = case compareFixity (pr, op) (p, g) of | ||
355 | Right GT -> compileOps ((pr, op, e'): acc) es | ||
356 | Right LT -> compileOps (shrink acc) es_ | ||
357 | Left err -> error err | ||
358 | where | ||
359 | pr = getFixity op | ||
360 | |||
361 | shrink ((_, op, e): (pr, op', e'): es) = (pr, op', app op e' e): es | ||
362 | |||
363 | compareFixity ((dir, i), op) ((dir', i'), op') | ||
364 | | i > i' = Right GT | ||
365 | | i < i' = Right LT | ||
366 | | otherwise = case (dir, dir') of | ||
367 | (InfixL, InfixL) -> Right LT | ||
368 | (InfixR, InfixR) -> Right GT | ||
369 | _ -> Left $ "fixity error:" ++ show (op, op') | ||
370 | |||
371 | parseFixityDecl :: P [Stmt] | ||
372 | parseFixityDecl = do | ||
373 | dir <- Infix <$ reserved "infix" | ||
374 | <|> InfixL <$ reserved "infixl" | ||
375 | <|> InfixR <$ reserved "infixr" | ||
376 | localIndentation Gt $ do | ||
377 | i <- fromIntegral <$> natural | ||
378 | ns <- commaSep1 (parseSIName operatorT) | ||
379 | return $ PrecDef <$> ns <*> pure (dir, i) | ||
380 | |||
381 | getFixity :: DesugarInfo -> SName -> Fixity | ||
382 | getFixity (fm, _) n = fromMaybe (InfixL, 9) $ Map.lookup n fm | ||
383 | |||
384 | 75 | ||
385 | -------------------------------------------------------------------------------- literals | 76 | -------------------------------------------------------------------------------- literals |
386 | 77 | ||
@@ -489,6 +180,26 @@ downToS n m = map (SVar (debugSI "20", ".ds")) [n+m-1, n+m-2..n] | |||
489 | 180 | ||
490 | xSLabelEnd = id --SLabelEnd | 181 | xSLabelEnd = id --SLabelEnd |
491 | 182 | ||
183 | instance SourceInfo (SExp' a) where | ||
184 | sourceInfo = \case | ||
185 | SGlobal (si, _) -> si | ||
186 | SBind si _ _ e1 e2 -> si | ||
187 | SApp si _ e1 e2 -> si | ||
188 | SLet _ e1 e2 -> sourceInfo e1 <> sourceInfo e2 | ||
189 | SVar (si, _) _ -> si | ||
190 | STyped si _ -> si | ||
191 | SLit si _ -> si | ||
192 | |||
193 | instance SetSourceInfo (SExp' a) where | ||
194 | setSI si = \case | ||
195 | SBind _ a b c d -> SBind si a b c d | ||
196 | SApp _ a b c -> SApp si a b c | ||
197 | SLet le a b -> SLet le a b | ||
198 | SVar (_, n) i -> SVar (si, n) i | ||
199 | STyped _ t -> STyped si t | ||
200 | SGlobal (_, n) -> SGlobal (si, n) | ||
201 | SLit _ l -> SLit si l | ||
202 | |||
492 | -------------------------------------------------------------------------------- low-level toolbox | 203 | -------------------------------------------------------------------------------- low-level toolbox |
493 | 204 | ||
494 | foldS g f i = \case | 205 | foldS g f i = \case |
@@ -767,6 +478,16 @@ getPVars_ = \case | |||
767 | getPPVars_ = \case | 478 | getPPVars_ = \case |
768 | ParPat pp -> foldMap getPVars_ pp | 479 | ParPat pp -> foldMap getPVars_ pp |
769 | 480 | ||
481 | instance SourceInfo ParPat where | ||
482 | sourceInfo (ParPat ps) = sourceInfo ps | ||
483 | |||
484 | instance SourceInfo Pat where | ||
485 | sourceInfo = \case | ||
486 | PVar (si,_) -> si | ||
487 | PCon (si,_) ps -> si <> sourceInfo ps | ||
488 | ViewPat e ps -> sourceInfo e <> sourceInfo ps | ||
489 | PatType ps e -> sourceInfo ps <> sourceInfo e | ||
490 | |||
770 | -------------------------------------------------------------------------------- pattern parsing | 491 | -------------------------------------------------------------------------------- pattern parsing |
771 | 492 | ||
772 | parsePat :: Prec -> P Pat | 493 | parsePat :: Prec -> P Pat |
@@ -1029,7 +750,7 @@ parseDef = | |||
1029 | [{-TypeAnn x $ addParamsS ts $ SType-}{-todo-}] | 750 | [{-TypeAnn x $ addParamsS ts $ SType-}{-todo-}] |
1030 | [FunAlt x (zip ts $ map PVar $ reverse nps) $ Right rhs] | 751 | [FunAlt x (zip ts $ map PVar $ reverse nps) $ Right rhs] |
1031 | <|> do try "typed ident" $ (\(vs, t) -> TypeAnn <$> vs <*> pure t) <$> typedIds Nothing | 752 | <|> do try "typed ident" $ (\(vs, t) -> TypeAnn <$> vs <*> pure t) <$> typedIds Nothing |
1032 | <|> parseFixityDecl | 753 | <|> map (uncurry PrecDef) <$> parseFixityDecl |
1033 | <|> pure <$> funAltDef varId | 754 | <|> pure <$> funAltDef varId |
1034 | <|> pure <$> valueDef | 755 | <|> pure <$> valueDef |
1035 | where | 756 | where |