summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPéter Diviánszky <divipp@gmail.com>2016-01-27 16:58:48 +0100
committerPéter Diviánszky <divipp@gmail.com>2016-01-27 16:58:53 +0100
commit29146cae82240d1d21b2e63c72177c58d632b502 (patch)
tree96c49db8be1730a55e31042b202f0e2703aeddd8
parent5a5ddde0b4ef214989f2bfad74b094429da56d3a (diff)
Token.hs --> Lexer.hs
-rw-r--r--lambdacube-compiler.cabal2
-rw-r--r--src/LambdaCube/Compiler/Infer.hs1
-rw-r--r--src/LambdaCube/Compiler/Lexer.hs640
-rw-r--r--src/LambdaCube/Compiler/Parser.hs345
4 files changed, 675 insertions, 313 deletions
diff --git a/lambdacube-compiler.cabal b/lambdacube-compiler.cabal
index 7fcb617e..21411016 100644
--- a/lambdacube-compiler.cabal
+++ b/lambdacube-compiler.cabal
@@ -39,8 +39,8 @@ source-repository head
39library 39library
40 exposed-modules: 40 exposed-modules:
41 -- Compiler 41 -- Compiler
42 LambdaCube.Compiler.Token
43 LambdaCube.Compiler.Pretty 42 LambdaCube.Compiler.Pretty
43 LambdaCube.Compiler.Lexer
44 LambdaCube.Compiler.Parser 44 LambdaCube.Compiler.Parser
45 LambdaCube.Compiler.Infer 45 LambdaCube.Compiler.Infer
46 LambdaCube.Compiler.CoreToIR 46 LambdaCube.Compiler.CoreToIR
diff --git a/src/LambdaCube/Compiler/Infer.hs b/src/LambdaCube/Compiler/Infer.hs
index aa79354b..f7f5e809 100644
--- a/src/LambdaCube/Compiler/Infer.hs
+++ b/src/LambdaCube/Compiler/Infer.hs
@@ -37,6 +37,7 @@ import Control.Arrow hiding ((<+>))
37import Control.DeepSeq 37import Control.DeepSeq
38 38
39import LambdaCube.Compiler.Pretty hiding (Doc, braces, parens) 39import LambdaCube.Compiler.Pretty hiding (Doc, braces, parens)
40import LambdaCube.Compiler.Lexer
40import LambdaCube.Compiler.Parser 41import LambdaCube.Compiler.Parser
41 42
42-------------------------------------------------------------------------------- compiled expression representation 43-------------------------------------------------------------------------------- compiled expression representation
diff --git a/src/LambdaCube/Compiler/Lexer.hs b/src/LambdaCube/Compiler/Lexer.hs
new file mode 100644
index 00000000..dd61c5e0
--- /dev/null
+++ b/src/LambdaCube/Compiler/Lexer.hs
@@ -0,0 +1,640 @@
1-- contains Haskell source code copied from Text.Parsec.Token, see below
2{-# LANGUAGE LambdaCase #-}
3{-# LANGUAGE ViewPatterns #-}
4{-# LANGUAGE PatternSynonyms #-}
5{-# LANGUAGE FlexibleContexts #-}
6{-# LANGUAGE FlexibleInstances #-}
7{-# LANGUAGE NoMonomorphismRestriction #-}
8{-# LANGUAGE OverloadedStrings #-}
9{-# LANGUAGE ScopedTypeVariables #-}
10{-# OPTIONS_GHC -fno-warn-orphans #-} -- instance NFData SourcePos
11{-# OPTIONS -fno-warn-unused-do-bind -fno-warn-name-shadowing #-}
12module LambdaCube.Compiler.Lexer where
13
14import Data.Monoid
15import Data.Maybe
16import Data.List
17import Data.Char
18import Data.Set (Set)
19import qualified Data.Set as Set
20import qualified Data.Map as Map
21
22import Control.Monad.Except
23import Control.Monad.Reader
24import Control.Monad.Writer
25import Control.Arrow hiding ((<+>))
26import Control.Applicative
27import Control.DeepSeq
28
29import Text.Parsec hiding (label, Empty, State, (<|>), many)
30import qualified Text.Parsec as Pa
31import qualified Text.Parsec.Token as Pa
32import Text.ParserCombinators.Parsec.Language (GenLanguageDef (..))
33import qualified Text.ParserCombinators.Parsec.Language as Pa
34import Text.Parsec.Indentation hiding (Any)
35import qualified Text.Parsec.Indentation as Pa
36import Text.Parsec.Indentation.Char
37
38import LambdaCube.Compiler.Pretty hiding (Doc, braces, parens)
39
40-------------------------------------------------------------------------------- parser utils
41
42-- see http://blog.ezyang.com/2014/05/parsec-try-a-or-b-considered-harmful/comment-page-1/#comment-6602
43try_ s m = Pa.try m <?> s
44
45-- n, m >= 1, n < m
46manyNM n m p = do
47 xs <- many1 p
48 let lxs = length xs
49 unless (n <= lxs && lxs <= m) . fail $ unwords ["manyNM", show n, show m, "found", show lxs, "occurences."]
50 return xs
51
52-------------------------------------------------------------------------------- parser type
53
54type P = ParsecT (IndentStream (CharIndentStream String)) SourcePos InnerP
55type InnerP = WriterT [PostponedCheck] (Reader (DesugarInfo, Namespace))
56
57type PostponedCheck = Maybe String
58
59type DesugarInfo = (FixityMap, ConsMap)
60
61type ConsMap = Map.Map SName{-constructor name-}
62 (Either ((SName{-type name-}, Int{-num of indices-}), [(SName, Int)]{-constructors with arities-})
63 Int{-arity-})
64
65dsInfo :: P DesugarInfo
66dsInfo = asks fst
67
68namespace :: P Namespace
69namespace = asks snd
70
71-------------------------------------------------------------------------------- lexing
72
73{-# INLINE languageDef #-}
74languageDef :: GenLanguageDef (IndentStream (CharIndentStream String)) SourcePos InnerP
75languageDef = Pa.LanguageDef
76 { Pa.commentStart = Pa.commentStart Pa.haskellDef
77 , Pa.commentEnd = Pa.commentEnd Pa.haskellDef
78 , Pa.commentLine = Pa.commentLine Pa.haskellDef
79 , Pa.nestedComments = Pa.nestedComments Pa.haskellDef
80 , Pa.identStart = indentStreamParser $ charIndentStreamParser $ letter <|> char '_' -- '_' is included also
81 , Pa.identLetter = indentStreamParser $ charIndentStreamParser $ alphaNum <|> oneOf "_'#"
82 , Pa.opStart = indentStreamParser $ charIndentStreamParser $ oneOf ":!#$%&*+./<=>?@\\^|-~"
83 , Pa.opLetter = indentStreamParser $ charIndentStreamParser $ oneOf ":!#$%&*+./<=>?@\\^|-~"
84 , Pa.reservedOpNames = Pa.reservedOpNames Pa.haskellDef
85 , Pa.reservedNames = Pa.reservedNames Pa.haskellDef
86 , Pa.caseSensitive = Pa.caseSensitive Pa.haskellDef
87 }
88
89lexeme p = p <* (getPosition >>= setState >> whiteSpace)
90
91-------------------------------------------------------------------------------- names
92
93type SName = String
94
95caseName (c:cs) = toLower c: cs ++ "Case"
96
97pattern MatchName cs <- (getMatchName -> Just cs) where MatchName = matchName
98
99matchName cs = "match" ++ cs
100getMatchName ('m':'a':'t':'c':'h':cs) = Just cs
101getMatchName _ = Nothing
102
103
104-------------------------------------------------------------------------------- source infos
105
106instance NFData SourcePos where
107 rnf x = x `seq` ()
108
109data Range = Range SourcePos SourcePos
110 deriving (Eq, Ord)
111
112instance NFData Range where
113 rnf (Range a b) = rnf a `seq` rnf b `seq` ()
114
115instance PShow Range where
116 pShowPrec _ (Range b e) | sourceName b == sourceName e = text (sourceName b) <+> f b <> "-" <> f e
117 where
118 f x = pShow (sourceLine x) <> ":" <> pShow (sourceColumn x)
119
120joinRange :: Range -> Range -> Range
121joinRange (Range b e) (Range b' e') = Range (min b b') (max e e')
122
123data SI
124 = NoSI (Set String) -- no source info, attached debug info
125 | RangeSI Range
126
127instance Show SI where show _ = "SI"
128instance Eq SI where _ == _ = True
129instance Ord SI where _ `compare` _ = EQ
130
131instance Monoid SI where
132 mempty = NoSI Set.empty
133 mappend (RangeSI r1) (RangeSI r2) = RangeSI (joinRange r1 r2)
134 mappend (NoSI ds1) (NoSI ds2) = NoSI (ds1 `Set.union` ds2)
135 mappend r@RangeSI{} _ = r
136 mappend _ r@RangeSI{} = r
137
138instance PShow SI where
139 pShowPrec _ (NoSI ds) = hsep $ map pShow $ Set.toList ds
140 pShowPrec _ (RangeSI r) = pShow r
141
142showSI_ _ (NoSI ds) = unwords $ Set.toList ds
143showSI_ source (RangeSI (Range s e)) = show str
144 where
145 startLine = sourceLine s - 1
146 endline = sourceLine e - if sourceColumn e == 1 then 1 else 0
147 len = endline - startLine
148 str = vcat $ text (show s <> ":"){- <+> "-" <+> text (show e)-}:
149 map text (take len $ drop startLine $ lines source)
150 ++ [text $ replicate (sourceColumn s - 1) ' ' ++ replicate (sourceColumn e - sourceColumn s) '^' | len == 1]
151
152-- TODO: remove
153validSI RangeSI{} = True
154validSI _ = False
155
156debugSI a = NoSI (Set.singleton a)
157
158si@(RangeSI r) `validate` xs | all validSI xs && r `notElem` [r | RangeSI r <- xs] = si
159_ `validate` _ = mempty
160
161class SourceInfo si where
162 sourceInfo :: si -> SI
163
164instance SourceInfo SI where
165 sourceInfo = id
166
167instance SourceInfo si => SourceInfo [si] where
168 sourceInfo = foldMap sourceInfo
169
170class SetSourceInfo a where
171 setSI :: SI -> a -> a
172
173appRange :: P (SI -> a) -> P a
174appRange p = (\p1 a p2 -> a $ RangeSI $ Range p1 p2) <$> getPosition <*> p <*> getState
175
176withRange :: (SI -> a -> b) -> P a -> P b
177withRange f p = appRange $ flip f <$> p
178
179infix 9 `withRange`
180
181type SIName = (SI, SName)
182
183parseSIName :: P String -> P SIName
184parseSIName = withRange (,)
185
186-------------------------------------------------------------------------------- namespace handling
187
188data Level = TypeLevel | ExpLevel
189 deriving (Eq, Show)
190
191data Namespace = Namespace
192 { namespaceLevel :: Maybe Level
193 , constructorNamespace :: Bool -- True means that the case of the first letter of identifiers matters
194 }
195 deriving (Show)
196
197tick :: Namespace -> SName -> SName
198tick = (\case TypeLevel -> switchTick; _ -> id) . fromMaybe ExpLevel . namespaceLevel
199
200switchTick ('\'': n) = n
201switchTick n = '\'': n
202
203modifyLevel f = local $ second $ \(Namespace l p) -> Namespace (f <$> l) p
204
205typeNS, expNS, switchNS :: P a -> P a
206typeNS = modifyLevel $ const TypeLevel
207expNS = modifyLevel $ const ExpLevel
208switchNS = modifyLevel $ \case ExpLevel -> TypeLevel; TypeLevel -> ExpLevel
209
210-------------------------------------------------------------------------------- identifiers
211
212check msg p m = try_ msg $ mfilter p m
213
214firstCaseChar ('\'': c: _) = c
215firstCaseChar (c: _) = c
216
217upperCase, lowerCase, symbols, colonSymbols :: P SName
218--upperCase NonTypeNamespace = mzero -- todo
219upperCase = namespace >>= \ns -> (if constructorNamespace ns then check "uppercase ident" (isUpper . firstCaseChar) else id) $ tick ns <$> (identifier <|> try_ "tick ident" (('\'':) <$ char '\'' <*> identifier))
220lowerCase = namespace >>= \ns -> (if constructorNamespace ns then check "lowercase ident" (isLower . firstCaseChar) else id) identifier
221 <|> try_ "underscore ident" (('_':) <$ char '_' <*> identifier)
222symbols = check "symbols" ((/=':') . head) operator
223colonSymbols = "Cons" <$ reservedOp ":" <|> check "symbols" ((==':') . head) operator
224
225moduleName = {-qualified_ todo-} expNS upperCase
226patVar = lowerCase <|> "" <$ reserved "_"
227--qIdent = {-qualified_ todo-} (lowerCase <|> upperCase)
228backquotedIdent = try_ "backquoted ident" $ lexeme $ char '`' *> ((:) <$> satisfy isAlpha <*> many (satisfy isAlphaNum)) <* char '`'
229operatorT = symbols <|> colonSymbols <|> backquotedIdent
230varId = lowerCase <|> parens operatorT
231
232{-
233qualified_ id = do
234 q <- try_ "qualification" $ upperCase' <* dot
235 (N t qs n i) <- qualified_ id
236 return $ N t (q:qs) n i
237 <|>
238 id
239 where
240 upperCase' = (:) <$> satisfy isUpper <*> many (satisfy isAlphaNum)
241-}
242
243-------------------------------------------------------------------------------- fixity handling
244
245data FixityDef = Infix | InfixL | InfixR deriving (Show)
246type Fixity = (FixityDef, Int)
247type MFixity = Maybe Fixity
248type FixityMap = Map.Map SName Fixity
249
250calcPrec
251 :: (Show e, Show f)
252 => (f -> e -> e -> e)
253 -> (f -> Fixity)
254 -> e
255 -> [(f, e)]
256 -> e
257calcPrec app getFixity e = compileOps [((Infix, -1), undefined, e)]
258 where
259 compileOps [(_, _, e)] [] = e
260 compileOps acc [] = compileOps (shrink acc) []
261 compileOps acc@((p, g, e1): ee) es_@((op, e'): es) = case compareFixity (pr, op) (p, g) of
262 Right GT -> compileOps ((pr, op, e'): acc) es
263 Right LT -> compileOps (shrink acc) es_
264 Left err -> error err
265 where
266 pr = getFixity op
267
268 shrink ((_, op, e): (pr, op', e'): es) = (pr, op', app op e' e): es
269
270 compareFixity ((dir, i), op) ((dir', i'), op')
271 | i > i' = Right GT
272 | i < i' = Right LT
273 | otherwise = case (dir, dir') of
274 (InfixL, InfixL) -> Right LT
275 (InfixR, InfixR) -> Right GT
276 _ -> Left $ "fixity error:" ++ show (op, op')
277
278parseFixityDecl :: P [(SIName, Fixity)]
279parseFixityDecl = do
280 dir <- Infix <$ reserved "infix"
281 <|> InfixL <$ reserved "infixl"
282 <|> InfixR <$ reserved "infixr"
283 localIndentation Gt $ do
284 i <- fromIntegral <$> natural
285 ns <- commaSep1 (parseSIName operatorT)
286 return $ (,) <$> ns <*> pure (dir, i)
287
288getFixity :: DesugarInfo -> SName -> Fixity
289getFixity (fm, _) n = fromMaybe (InfixL, 9) $ Map.lookup n fm
290
291-------------------------------------------------------------------------------------------------
292
293
294
295
296
297----------------------------------------------------------------------
298----------------------------------------------------------------------
299-- copied from
300--
301-- Module : Text.Parsec.Token
302-- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007
303-- License : BSD-style
304
305-----------------------------------------------------------
306-- Bracketing
307-----------------------------------------------------------
308parens p = between (symbol "(") (symbol ")") p
309braces p = between (symbol "{") (symbol "}") p
310angles p = between (symbol "<") (symbol ">") p
311brackets p = between (symbol "[") (symbol "]") p
312
313semi = symbol ";"
314comma = symbol ","
315dot = symbol "."
316colon = symbol ":"
317
318commaSep p = sepBy p comma
319semiSep p = sepBy p semi
320
321commaSep1 p = sepBy1 p comma
322semiSep1 p = sepBy1 p semi
323
324
325-----------------------------------------------------------
326-- Chars & Strings
327-----------------------------------------------------------
328charLiteral = lexeme (between (char '\'')
329 (char '\'' <?> "end of character")
330 characterChar )
331 <?> "character"
332
333characterChar = charLetter <|> charEscape
334 <?> "literal character"
335
336charEscape = do{ char '\\'; escapeCode }
337charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
338
339
340
341stringLiteral = lexeme (
342 do{ str <- between (char '"')
343 (localTokenMode (const Pa.Any) (char '"' <?> "end of string"))
344 (localTokenMode (const Pa.Any) (many stringChar))
345 ; return (foldr (maybe id (:)) "" str)
346 }
347 <?> "literal string")
348
349stringChar = do{ c <- stringLetter; return (Just c) }
350 <|> stringEscape
351 <?> "string character"
352
353stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
354
355stringEscape = do{ char '\\'
356 ; do{ escapeGap ; return Nothing }
357 <|> do{ escapeEmpty; return Nothing }
358 <|> do{ esc <- escapeCode; return (Just esc) }
359 }
360
361escapeEmpty = char '&'
362escapeGap = do{ many1 space
363 ; char '\\' <?> "end of string gap"
364 }
365
366
367
368-- escape codes
369escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
370 <?> "escape code"
371
372charControl = do{ char '^'
373 ; code <- upper
374 ; return (toEnum (fromEnum code - fromEnum 'A'))
375 }
376
377charNum = do{ code <- decimal
378 <|> do{ char 'o'; number 8 octDigit }
379 <|> do{ char 'x'; number 16 hexDigit }
380 ; return (toEnum (fromInteger code))
381 }
382
383charEsc = choice (map parseEsc escMap)
384 where
385 parseEsc (c,code) = do{ char c; return code }
386
387charAscii = choice (map parseAscii asciiMap)
388 where
389 parseAscii (asc,code) = try (do{ string asc; return code })
390
391
392-- escape code tables
393escMap = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'")
394asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
395
396ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
397 "FS","GS","RS","US","SP"]
398ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
399 "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
400 "CAN","SUB","ESC","DEL"]
401
402ascii2 = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI',
403 '\EM','\FS','\GS','\RS','\US','\SP']
404ascii3 = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK',
405 '\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK',
406 '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL']
407
408
409-----------------------------------------------------------
410-- Numbers
411-----------------------------------------------------------
412naturalOrFloat = lexeme (natFloat) <?> "number"
413
414float = lexeme floating <?> "float"
415integer = lexeme int <?> "integer"
416natural = lexeme nat <?> "natural"
417
418
419-- floats
420floating = do{ n <- decimal
421 ; fractExponent n
422 }
423
424
425natFloat = do{ char '0'
426 ; zeroNumFloat
427 }
428 <|> decimalFloat
429
430zeroNumFloat = do{ n <- hexadecimal <|> octal
431 ; return (Left n)
432 }
433 <|> decimalFloat
434 <|> fractFloat 0
435 <|> return (Left 0)
436
437decimalFloat = do{ n <- decimal
438 ; option (Left n)
439 (fractFloat n)
440 }
441
442fractFloat n = do{ f <- fractExponent n
443 ; return (Right f)
444 }
445
446fractExponent n = do{ fract <- fraction
447 ; expo <- option 1.0 exponent'
448 ; return ((fromInteger n + fract)*expo)
449 }
450 <|>
451 do{ expo <- exponent'
452 ; return ((fromInteger n)*expo)
453 }
454
455fraction = do{ char '.'
456 ; digits <- many1 digit <?> "fraction"
457 ; return (foldr op 0.0 digits)
458 }
459 <?> "fraction"
460 where
461 op d f = (f + fromIntegral (digitToInt d))/10.0
462
463exponent' = do{ oneOf "eE"
464 ; f <- sign
465 ; e <- decimal <?> "exponent"
466 ; return (power (f e))
467 }
468 <?> "exponent"
469 where
470 power e | e < 0 = 1.0/power(-e)
471 | otherwise = fromInteger (10^e)
472
473
474-- integers and naturals
475int = do{ f <- lexeme sign
476 ; n <- nat
477 ; return (f n)
478 }
479
480sign = (char '-' >> return negate)
481 <|> (char '+' >> return id)
482 <|> return id
483
484nat = zeroNumber <|> decimal
485
486zeroNumber = do{ char '0'
487 ; hexadecimal <|> octal <|> decimal <|> return 0
488 }
489 <?> ""
490
491decimal = number 10 digit
492hexadecimal = do{ oneOf "xX"; number 16 hexDigit }
493octal = do{ oneOf "oO"; number 8 octDigit }
494
495number base baseDigit
496 = do{ digits <- many1 baseDigit
497 ; let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits
498 ; seq n (return n)
499 }
500
501-----------------------------------------------------------
502-- Operators & reserved ops
503-----------------------------------------------------------
504reservedOp name =
505 lexeme $ try $
506 do{ string name
507 ; notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)
508 }
509
510operator =
511 lexeme $ try $
512 do{ name <- oper
513 ; if (isReservedOp name)
514 then unexpected ("reserved operator " ++ show name)
515 else return name
516 }
517
518oper =
519 do{ c <- (opStart languageDef)
520 ; cs <- many (opLetter languageDef)
521 ; return (c:cs)
522 }
523 <?> "operator"
524
525isReservedOp name =
526 isReserved (sort (reservedOpNames languageDef)) name
527
528
529-----------------------------------------------------------
530-- Identifiers & Reserved words
531-----------------------------------------------------------
532reserved name =
533 lexeme $ try $
534 do{ caseString name
535 ; notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)
536 }
537
538caseString name
539 | caseSensitive languageDef = string name
540 | otherwise = do{ walk name; return name }
541 where
542 walk [] = return ()
543 walk (c:cs) = do{ caseChar c <?> msg; walk cs }
544
545 caseChar c | isAlpha c = char (toLower c) <|> char (toUpper c)
546 | otherwise = char c
547
548 msg = show name
549
550
551identifier =
552 lexeme $ try $
553 do{ name <- ident
554 ; if (isReservedName name)
555 then unexpected ("reserved word " ++ show name)
556 else return name
557 }
558
559
560ident
561 = do{ c <- identStart languageDef
562 ; cs <- many (identLetter languageDef)
563 ; return (c:cs)
564 }
565 <?> "identifier"
566
567isReservedName name
568 = isReserved theReservedNames caseName
569 where
570 caseName | caseSensitive languageDef = name
571 | otherwise = map toLower name
572
573
574isReserved names name
575 = scan names
576 where
577 scan [] = False
578 scan (r:rs) = case (compare r name) of
579 LT -> scan rs
580 EQ -> True
581 GT -> False
582
583theReservedNames
584 | caseSensitive languageDef = sort reserved
585 | otherwise = sort . map (map toLower) $ reserved
586 where
587 reserved = reservedNames languageDef
588
589
590
591-----------------------------------------------------------
592-- White space & symbols
593-----------------------------------------------------------
594symbol name
595 = lexeme (string name)
596
597whiteSpace = ignoreAbsoluteIndentation (localTokenMode (const Pa.Any) whiteSpace')
598whiteSpace'
599 | noLine && noMulti = skipMany (simpleSpace <?> "")
600 | noLine = skipMany (simpleSpace <|> multiLineComment <?> "")
601 | noMulti = skipMany (simpleSpace <|> oneLineComment <?> "")
602 | otherwise = skipMany (simpleSpace <|> oneLineComment <|> multiLineComment <?> "")
603 where
604 noLine = null (commentLine languageDef)
605 noMulti = null (commentStart languageDef)
606
607simpleSpace =
608 skipMany1 (satisfy isSpace)
609
610oneLineComment =
611 do{ try (string (commentLine languageDef))
612 ; skipMany (satisfy (/= '\n'))
613 ; return ()
614 }
615
616multiLineComment =
617 do { try (string (commentStart languageDef))
618 ; inComment
619 }
620
621inComment
622 | nestedComments languageDef = inCommentMulti
623 | otherwise = inCommentSingle
624
625inCommentMulti
626 = do{ try (string (commentEnd languageDef)) ; return () }
627 <|> do{ multiLineComment ; inCommentMulti }
628 <|> do{ skipMany1 (noneOf startEnd) ; inCommentMulti }
629 <|> do{ oneOf startEnd ; inCommentMulti }
630 <?> "end of comment"
631 where
632 startEnd = nub (commentEnd languageDef ++ commentStart languageDef)
633
634inCommentSingle
635 = do{ try (string (commentEnd languageDef)); return () }
636 <|> do{ skipMany1 (noneOf startEnd) ; inCommentSingle }
637 <|> do{ oneOf startEnd ; inCommentSingle }
638 <?> "end of comment"
639 where
640 startEnd = nub (commentEnd languageDef ++ commentStart languageDef)
diff --git a/src/LambdaCube/Compiler/Parser.hs b/src/LambdaCube/Compiler/Parser.hs
index 2d7eb49f..31012fdd 100644
--- a/src/LambdaCube/Compiler/Parser.hs
+++ b/src/LambdaCube/Compiler/Parser.hs
@@ -7,8 +7,6 @@
7{-# LANGUAGE OverloadedStrings #-} 7{-# LANGUAGE OverloadedStrings #-}
8{-# LANGUAGE DeriveFunctor #-} 8{-# LANGUAGE DeriveFunctor #-}
9{-# LANGUAGE ScopedTypeVariables #-} 9{-# LANGUAGE ScopedTypeVariables #-}
10{-# OPTIONS_GHC -fno-warn-orphans #-} -- instance NFData SourcePos
11-- {-# OPTIONS_GHC -O0 #-}
12module LambdaCube.Compiler.Parser 10module LambdaCube.Compiler.Parser
13 {- todo ( definitions 11 {- todo ( definitions
14 , extensions 12 , extensions
@@ -28,8 +26,6 @@ import Data.Maybe
28import Data.List 26import Data.List
29import Data.Char 27import Data.Char
30import Data.String 28import Data.String
31import Data.Set (Set)
32import qualified Data.Set as Set
33import qualified Data.Map as Map 29import qualified Data.Map as Map
34 30
35import Control.Monad.Except 31import Control.Monad.Except
@@ -38,19 +34,16 @@ import Control.Monad.Writer
38import Control.Monad.State 34import Control.Monad.State
39import Control.Arrow hiding ((<+>)) 35import Control.Arrow hiding ((<+>))
40import Control.Applicative 36import Control.Applicative
41import Control.DeepSeq
42 37
43import Text.Parsec hiding (label, Empty, State, (<|>), many, try) 38import Text.Parsec hiding (label, Empty, State, (<|>), many, try)
44import qualified Text.Parsec as Pa 39import qualified Text.Parsec as Pa
45import qualified Text.Parsec.Token as Pa
46import qualified Text.ParserCombinators.Parsec.Language as Pa
47import Text.Parsec.Pos 40import Text.Parsec.Pos
48import Text.Parsec.Indentation hiding (Any) 41import Text.Parsec.Indentation hiding (Any)
49import Text.Parsec.Indentation.Char 42import Text.Parsec.Indentation.Char
50 43
51import qualified LambdaCube.Compiler.Pretty as P 44import qualified LambdaCube.Compiler.Pretty as P
52import LambdaCube.Compiler.Pretty hiding (Doc, braces, parens) 45import LambdaCube.Compiler.Pretty hiding (Doc, braces, parens)
53import LambdaCube.Compiler.Token 46import LambdaCube.Compiler.Lexer
54 47
55-------------------------------------------------------------------------------- utils 48-------------------------------------------------------------------------------- utils
56 49
@@ -78,309 +71,7 @@ traceD x = if debug then trace_ x else id
78 71
79debug = False--True--tr 72debug = False--True--tr
80 73
81-------------------------------------------------------------------------------- parser utils 74try = try_
82
83-- see http://blog.ezyang.com/2014/05/parsec-try-a-or-b-considered-harmful/comment-page-1/#comment-6602
84try s m = Pa.try m <?> s
85
86-- n, m >= 1, n < m
87manyNM n m p = do
88 xs <- many1 p
89 let lxs = length xs
90 unless (n <= lxs && lxs <= m) . fail $ unwords ["manyNM", show n, show m, "found", show lxs, "occurences."]
91 return xs
92
93-------------------------------------------------------------------------------- parser type
94
95type P = ParsecT (IndentStream (CharIndentStream String)) SourcePos InnerP
96type InnerP = WriterT [PostponedCheck] (Reader (DesugarInfo, Namespace))
97
98type PostponedCheck = Maybe String
99
100type DesugarInfo = (FixityMap, ConsMap)
101
102type ConsMap = Map.Map SName{-constructor name-}
103 (Either ((SName{-type name-}, Int{-num of indices-}), [(SName, Int)]{-constructors with arities-})
104 Int{-arity-})
105
106dsInfo :: P DesugarInfo
107dsInfo = asks fst
108
109namespace :: P Namespace
110namespace = asks snd
111
112-------------------------------------------------------------------------------- lexing
113
114{-# NOINLINE lexer #-}
115lexer :: Pa.GenTokenParser (IndentStream (CharIndentStream String)) SourcePos InnerP
116lexer = makeTokenParser lexeme $ makeIndentLanguageDef style
117 where
118 style = Pa.LanguageDef
119 { Pa.commentStart = Pa.commentStart Pa.haskellDef
120 , Pa.commentEnd = Pa.commentEnd Pa.haskellDef
121 , Pa.commentLine = Pa.commentLine Pa.haskellDef
122 , Pa.nestedComments = Pa.nestedComments Pa.haskellDef
123 , Pa.identStart = letter <|> char '_' -- '_' is included also
124 , Pa.identLetter = alphaNum <|> oneOf "_'#"
125 , Pa.opStart = Pa.opLetter style
126 , Pa.opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"
127 , Pa.reservedOpNames = Pa.reservedOpNames Pa.haskellDef
128 , Pa.reservedNames = Pa.reservedNames Pa.haskellDef
129 , Pa.caseSensitive = Pa.caseSensitive Pa.haskellDef
130 }
131
132lexeme p = p <* (getPosition >>= setState >> whiteSpace)
133
134parens = Pa.parens lexer
135braces = Pa.braces lexer
136brackets = Pa.brackets lexer
137commaSep = Pa.commaSep lexer
138commaSep1 = Pa.commaSep1 lexer
139--dot = Pa.dot lexer
140--comma = Pa.comma lexer
141colon = Pa.colon lexer
142natural = Pa.natural lexer
143--integer = Pa.integer lexer
144float = Pa.float lexer
145charLiteral = Pa.charLiteral lexer
146stringLiteral = Pa.stringLiteral lexer
147whiteSpace = Pa.whiteSpace lexer
148operator = Pa.operator lexer
149reserved = Pa.reserved lexer
150reservedOp = Pa.reservedOp lexer
151identifier = Pa.identifier lexer
152
153
154-------------------------------------------------------------------------------- names
155
156type SName = String
157
158caseName (c:cs) = toLower c: cs ++ "Case"
159
160pattern MatchName cs <- (getMatchName -> Just cs) where MatchName = matchName
161
162matchName cs = "match" ++ cs
163getMatchName ('m':'a':'t':'c':'h':cs) = Just cs
164getMatchName _ = Nothing
165
166
167-------------------------------------------------------------------------------- source infos
168
169instance NFData SourcePos where
170 rnf x = x `seq` ()
171
172data Range = Range SourcePos SourcePos
173 deriving (Eq, Ord)
174
175instance NFData Range where
176 rnf (Range a b) = rnf a `seq` rnf b `seq` ()
177
178instance PShow Range where
179 pShowPrec _ (Range b e) | sourceName b == sourceName e = text (sourceName b) <+> f b <> "-" <> f e
180 where
181 f x = pShow (sourceLine x) <> ":" <> pShow (sourceColumn x)
182
183joinRange :: Range -> Range -> Range
184joinRange (Range b e) (Range b' e') = Range (min b b') (max e e')
185
186data SI
187 = NoSI (Set String) -- no source info, attached debug info
188 | RangeSI Range
189
190instance Show SI where show _ = "SI"
191instance Eq SI where _ == _ = True
192instance Ord SI where _ `compare` _ = EQ
193
194instance Monoid SI where
195 mempty = NoSI Set.empty
196 mappend (RangeSI r1) (RangeSI r2) = RangeSI (joinRange r1 r2)
197 mappend (NoSI ds1) (NoSI ds2) = NoSI (ds1 `Set.union` ds2)
198 mappend r@RangeSI{} _ = r
199 mappend _ r@RangeSI{} = r
200
201instance PShow SI where
202 pShowPrec _ (NoSI ds) = hsep $ map pShow $ Set.toList ds
203 pShowPrec _ (RangeSI r) = pShow r
204
205showSI_ _ (NoSI ds) = unwords $ Set.toList ds
206showSI_ source (RangeSI (Range s e)) = show str
207 where
208 startLine = sourceLine s - 1
209 endline = sourceLine e - if sourceColumn e == 1 then 1 else 0
210 len = endline - startLine
211 str = vcat $ text (show s <> ":"){- <+> "-" <+> text (show e)-}:
212 map text (take len $ drop startLine $ lines source)
213 ++ [text $ replicate (sourceColumn s - 1) ' ' ++ replicate (sourceColumn e - sourceColumn s) '^' | len == 1]
214
215-- TODO: remove
216validSI RangeSI{} = True
217validSI _ = False
218
219debugSI a = NoSI (Set.singleton a)
220
221si@(RangeSI r) `validate` xs | all validSI xs && r `notElem` [r | RangeSI r <- xs] = si
222_ `validate` _ = mempty
223
224class SourceInfo si where
225 sourceInfo :: si -> SI
226
227instance SourceInfo SI where
228 sourceInfo = id
229
230instance SourceInfo si => SourceInfo [si] where
231 sourceInfo = foldMap sourceInfo
232
233instance SourceInfo ParPat where
234 sourceInfo (ParPat ps) = sourceInfo ps
235
236instance SourceInfo Pat where
237 sourceInfo = \case
238 PVar (si,_) -> si
239 PCon (si,_) ps -> si <> sourceInfo ps
240 ViewPat e ps -> sourceInfo e <> sourceInfo ps
241 PatType ps e -> sourceInfo ps <> sourceInfo e
242
243instance SourceInfo (SExp' a) where
244 sourceInfo = \case
245 SGlobal (si, _) -> si
246 SBind si _ _ e1 e2 -> si
247 SApp si _ e1 e2 -> si
248 SLet _ e1 e2 -> sourceInfo e1 <> sourceInfo e2
249 SVar (si, _) _ -> si
250 STyped si _ -> si
251 SLit si _ -> si
252
253class SetSourceInfo a where
254 setSI :: SI -> a -> a
255
256instance SetSourceInfo (SExp' a) where
257 setSI si = \case
258 SBind _ a b c d -> SBind si a b c d
259 SApp _ a b c -> SApp si a b c
260 SLet le a b -> SLet le a b
261 SVar (_, n) i -> SVar (si, n) i
262 STyped _ t -> STyped si t
263 SGlobal (_, n) -> SGlobal (si, n)
264 SLit _ l -> SLit si l
265
266appRange :: P (SI -> a) -> P a
267appRange p = (\p1 a p2 -> a $ RangeSI $ Range p1 p2) <$> getPosition <*> p <*> getState
268
269withRange :: (SI -> a -> b) -> P a -> P b
270withRange f p = appRange $ flip f <$> p
271
272infix 9 `withRange`
273
274type SIName = (SI, SName)
275
276parseSIName :: P String -> P SIName
277parseSIName = withRange (,)
278
279-------------------------------------------------------------------------------- namespace handling
280
281data Level = TypeLevel | ExpLevel
282 deriving (Eq, Show)
283
284data Namespace = Namespace
285 { namespaceLevel :: Maybe Level
286 , constructorNamespace :: Bool -- True means that the case of the first letter of identifiers matters
287 }
288 deriving (Show)
289
290tick :: Namespace -> SName -> SName
291tick = (\case TypeLevel -> switchTick; _ -> id) . fromMaybe ExpLevel . namespaceLevel
292
293switchTick ('\'': n) = n
294switchTick n = '\'': n
295
296modifyLevel f = local $ second $ \(Namespace l p) -> Namespace (f <$> l) p
297
298typeNS, expNS, switchNS :: P a -> P a
299typeNS = modifyLevel $ const TypeLevel
300expNS = modifyLevel $ const ExpLevel
301switchNS = modifyLevel $ \case ExpLevel -> TypeLevel; TypeLevel -> ExpLevel
302
303-------------------------------------------------------------------------------- identifiers
304
305check msg p m = try msg $ mfilter p m
306
307firstCaseChar ('\'': c: _) = c
308firstCaseChar (c: _) = c
309
310upperCase, lowerCase, symbols, colonSymbols :: P SName
311--upperCase NonTypeNamespace = mzero -- todo
312upperCase = namespace >>= \ns -> (if constructorNamespace ns then check "uppercase ident" (isUpper . firstCaseChar) else id) $ tick ns <$> (identifier <|> try "tick ident" (('\'':) <$ char '\'' <*> identifier))
313lowerCase = namespace >>= \ns -> (if constructorNamespace ns then check "lowercase ident" (isLower . firstCaseChar) else id) identifier
314 <|> try "underscore ident" (('_':) <$ char '_' <*> identifier)
315symbols = check "symbols" ((/=':') . head) operator
316colonSymbols = "Cons" <$ reservedOp ":" <|> check "symbols" ((==':') . head) operator
317
318moduleName = {-qualified_ todo-} expNS upperCase
319patVar = lowerCase <|> "" <$ reserved "_"
320--qIdent = {-qualified_ todo-} (lowerCase <|> upperCase)
321backquotedIdent = try "backquoted ident" $ lexeme $ char '`' *> ((:) <$> satisfy isAlpha <*> many (satisfy isAlphaNum)) <* char '`'
322operatorT = symbols <|> colonSymbols <|> backquotedIdent
323varId = lowerCase <|> parens operatorT
324
325{-
326qualified_ id = do
327 q <- try "qualification" $ upperCase' <* dot
328 (N t qs n i) <- qualified_ id
329 return $ N t (q:qs) n i
330 <|>
331 id
332 where
333 upperCase' = (:) <$> satisfy isUpper <*> many (satisfy isAlphaNum)
334-}
335
336-------------------------------------------------------------------------------- fixity handling
337
338data FixityDef = Infix | InfixL | InfixR deriving (Show)
339type Fixity = (FixityDef, Int)
340type MFixity = Maybe Fixity
341type FixityMap = Map.Map SName Fixity
342
343calcPrec
344 :: (Show e, Show f)
345 => (f -> e -> e -> e)
346 -> (f -> Fixity)
347 -> e
348 -> [(f, e)]
349 -> e
350calcPrec app getFixity e = compileOps [((Infix, -1), undefined, e)]
351 where
352 compileOps [(_, _, e)] [] = e
353 compileOps acc [] = compileOps (shrink acc) []
354 compileOps acc@((p, g, e1): ee) es_@((op, e'): es) = case compareFixity (pr, op) (p, g) of
355 Right GT -> compileOps ((pr, op, e'): acc) es
356 Right LT -> compileOps (shrink acc) es_
357 Left err -> error err
358 where
359 pr = getFixity op
360
361 shrink ((_, op, e): (pr, op', e'): es) = (pr, op', app op e' e): es
362
363 compareFixity ((dir, i), op) ((dir', i'), op')
364 | i > i' = Right GT
365 | i < i' = Right LT
366 | otherwise = case (dir, dir') of
367 (InfixL, InfixL) -> Right LT
368 (InfixR, InfixR) -> Right GT
369 _ -> Left $ "fixity error:" ++ show (op, op')
370
371parseFixityDecl :: P [Stmt]
372parseFixityDecl = do
373 dir <- Infix <$ reserved "infix"
374 <|> InfixL <$ reserved "infixl"
375 <|> InfixR <$ reserved "infixr"
376 localIndentation Gt $ do
377 i <- fromIntegral <$> natural
378 ns <- commaSep1 (parseSIName operatorT)
379 return $ PrecDef <$> ns <*> pure (dir, i)
380
381getFixity :: DesugarInfo -> SName -> Fixity
382getFixity (fm, _) n = fromMaybe (InfixL, 9) $ Map.lookup n fm
383
384 75
385-------------------------------------------------------------------------------- literals 76-------------------------------------------------------------------------------- literals
386 77
@@ -489,6 +180,26 @@ downToS n m = map (SVar (debugSI "20", ".ds")) [n+m-1, n+m-2..n]
489 180
490xSLabelEnd = id --SLabelEnd 181xSLabelEnd = id --SLabelEnd
491 182
183instance SourceInfo (SExp' a) where
184 sourceInfo = \case
185 SGlobal (si, _) -> si
186 SBind si _ _ e1 e2 -> si
187 SApp si _ e1 e2 -> si
188 SLet _ e1 e2 -> sourceInfo e1 <> sourceInfo e2
189 SVar (si, _) _ -> si
190 STyped si _ -> si
191 SLit si _ -> si
192
193instance SetSourceInfo (SExp' a) where
194 setSI si = \case
195 SBind _ a b c d -> SBind si a b c d
196 SApp _ a b c -> SApp si a b c
197 SLet le a b -> SLet le a b
198 SVar (_, n) i -> SVar (si, n) i
199 STyped _ t -> STyped si t
200 SGlobal (_, n) -> SGlobal (si, n)
201 SLit _ l -> SLit si l
202
492-------------------------------------------------------------------------------- low-level toolbox 203-------------------------------------------------------------------------------- low-level toolbox
493 204
494foldS g f i = \case 205foldS g f i = \case
@@ -767,6 +478,16 @@ getPVars_ = \case
767getPPVars_ = \case 478getPPVars_ = \case
768 ParPat pp -> foldMap getPVars_ pp 479 ParPat pp -> foldMap getPVars_ pp
769 480
481instance SourceInfo ParPat where
482 sourceInfo (ParPat ps) = sourceInfo ps
483
484instance SourceInfo Pat where
485 sourceInfo = \case
486 PVar (si,_) -> si
487 PCon (si,_) ps -> si <> sourceInfo ps
488 ViewPat e ps -> sourceInfo e <> sourceInfo ps
489 PatType ps e -> sourceInfo ps <> sourceInfo e
490
770-------------------------------------------------------------------------------- pattern parsing 491-------------------------------------------------------------------------------- pattern parsing
771 492
772parsePat :: Prec -> P Pat 493parsePat :: Prec -> P Pat
@@ -1029,7 +750,7 @@ parseDef =
1029 [{-TypeAnn x $ addParamsS ts $ SType-}{-todo-}] 750 [{-TypeAnn x $ addParamsS ts $ SType-}{-todo-}]
1030 [FunAlt x (zip ts $ map PVar $ reverse nps) $ Right rhs] 751 [FunAlt x (zip ts $ map PVar $ reverse nps) $ Right rhs]
1031 <|> do try "typed ident" $ (\(vs, t) -> TypeAnn <$> vs <*> pure t) <$> typedIds Nothing 752 <|> do try "typed ident" $ (\(vs, t) -> TypeAnn <$> vs <*> pure t) <$> typedIds Nothing
1032 <|> parseFixityDecl 753 <|> map (uncurry PrecDef) <$> parseFixityDecl
1033 <|> pure <$> funAltDef varId 754 <|> pure <$> funAltDef varId
1034 <|> pure <$> valueDef 755 <|> pure <$> valueDef
1035 where 756 where