diff options
Diffstat (limited to 'src/LambdaCube/Compiler/Lexer.hs')
-rw-r--r-- | src/LambdaCube/Compiler/Lexer.hs | 130 |
1 files changed, 56 insertions, 74 deletions
diff --git a/src/LambdaCube/Compiler/Lexer.hs b/src/LambdaCube/Compiler/Lexer.hs index dd61653c..1fc7ae79 100644 --- a/src/LambdaCube/Compiler/Lexer.hs +++ b/src/LambdaCube/Compiler/Lexer.hs | |||
@@ -1,4 +1,4 @@ | |||
1 | -- contains Haskell source code copied from Text.Parsec.Token, see below | 1 | -- contains modified Haskell source code copied from Text.Parsec.Token, see below |
2 | {-# LANGUAGE LambdaCase #-} | 2 | {-# LANGUAGE LambdaCase #-} |
3 | {-# LANGUAGE ViewPatterns #-} | 3 | {-# LANGUAGE ViewPatterns #-} |
4 | {-# LANGUAGE PatternSynonyms #-} | 4 | {-# LANGUAGE PatternSynonyms #-} |
@@ -29,7 +29,7 @@ import Control.DeepSeq | |||
29 | import Text.Parsec hiding (label, Empty, State, (<|>), many) | 29 | import Text.Parsec hiding (label, Empty, State, (<|>), many) |
30 | import qualified Text.Parsec as Pa | 30 | import qualified Text.Parsec as Pa |
31 | import qualified Text.Parsec.Token as Pa | 31 | import qualified Text.Parsec.Token as Pa |
32 | import Text.ParserCombinators.Parsec.Language (GenLanguageDef (..)) | 32 | import Text.ParserCombinators.Parsec.Language (GenLanguageDef)--hiding (identStart, identLetter, opStart, opLetter, reservedOpNames) |
33 | import qualified Text.ParserCombinators.Parsec.Language as Pa | 33 | import qualified Text.ParserCombinators.Parsec.Language as Pa |
34 | import Text.Parsec.Indentation hiding (Any) | 34 | import Text.Parsec.Indentation hiding (Any) |
35 | import qualified Text.Parsec.Indentation as Pa | 35 | import qualified Text.Parsec.Indentation as Pa |
@@ -73,12 +73,23 @@ namespace = asks snd | |||
73 | {-# INLINE languageDef #-} | 73 | {-# INLINE languageDef #-} |
74 | languageDef :: GenLanguageDef (IndentStream (CharIndentStream String)) SourcePos InnerP | 74 | languageDef :: GenLanguageDef (IndentStream (CharIndentStream String)) SourcePos InnerP |
75 | languageDef = Pa.haskellDef | 75 | languageDef = Pa.haskellDef |
76 | { Pa.identStart = indentStreamParser $ charIndentStreamParser $ letter <|> char '_' -- '_' is included also | 76 | { Pa.identStart = undefined |
77 | , Pa.identLetter = indentStreamParser $ charIndentStreamParser $ alphaNum <|> oneOf "_'#" | 77 | , Pa.identLetter = undefined |
78 | , Pa.opStart = indentStreamParser $ charIndentStreamParser $ oneOf ":!#$%&*+./<=>?@\\^|-~" | 78 | , Pa.opStart = undefined |
79 | , Pa.opLetter = indentStreamParser $ charIndentStreamParser $ oneOf ":!#$%&*+./<=>?@\\^|-~" | 79 | , Pa.opLetter = undefined |
80 | } | 80 | } |
81 | 81 | ||
82 | reservedNames = Pa.reservedNames languageDef | ||
83 | reservedOpNames = Pa.reservedOpNames languageDef | ||
84 | commentLine = Pa.commentLine languageDef | ||
85 | commentStart = Pa.commentStart languageDef | ||
86 | commentEnd = Pa.commentEnd languageDef | ||
87 | identStart = letter <|> char '_' -- '_' is included also | ||
88 | identLetter = alphaNum <|> oneOf "_'#" | ||
89 | opStart = oneOf ":!#$%&*+./<=>?@\\^|-~" | ||
90 | opStart' = oneOf "!#$%&*+./<=>?@\\^|-~" | ||
91 | opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~" | ||
92 | |||
82 | lexeme p = p <* (getPosition >>= setState >> whiteSpace) | 93 | lexeme p = p <* (getPosition >>= setState >> whiteSpace) |
83 | 94 | ||
84 | -------------------------------------------------------------------------------- names | 95 | -------------------------------------------------------------------------------- names |
@@ -202,25 +213,29 @@ switchNS = modifyLevel $ \case ExpLevel -> TypeLevel; TypeLevel -> ExpLevel | |||
202 | 213 | ||
203 | -------------------------------------------------------------------------------- identifiers | 214 | -------------------------------------------------------------------------------- identifiers |
204 | 215 | ||
205 | check msg p m = try_ msg $ mfilter p m | 216 | maybeStartWith p i = i <|> (:) <$> satisfy p <*> i |
217 | |||
218 | upperCase, lowerCase, symbols, colonSymbols, backquotedIdent :: P SName | ||
219 | |||
220 | upperCase = namespace >>= \ns -> tick ns <$> identifier_ (maybeStartWith (=='\'') $ if constructorNamespace ns then (:) <$> satisfy isUpper <*> many identLetter else ident) <?> "uppercase ident" | ||
221 | lowerCase = namespace >>= \ns -> identifier_ (if constructorNamespace ns then (:) <$> satisfy (\c -> isLower c || c == '_') <*> many identLetter else ident) <?> "lowercase ident" | ||
222 | backquotedIdent = lexeme $ try_ "backquoted ident" $ expect "reserved word" isReservedName $ char '`' *> ident <* char '`' | ||
223 | symbols = operator_ ((:) <$> opStart' <*> many opLetter) <?> "symbols" | ||
224 | colonSymbols = trCons <$> operator_ ((:) <$> satisfy (== ':') <*> many opLetter) <?> "op symbols" | ||
225 | where | ||
226 | trCons ":" = "Cons" | ||
227 | trCons x = x | ||
228 | |||
229 | expect msg p i = i >>= \n -> if (p n) then unexpected (msg ++ " " ++ show n) else return n | ||
206 | 230 | ||
207 | firstCaseChar ('\'': c: _) = c | ||
208 | firstCaseChar (c: _) = c | ||
209 | 231 | ||
210 | upperCase, lowerCase, symbols, colonSymbols :: P SName | 232 | ----------------- |
211 | --upperCase NonTypeNamespace = mzero -- todo | ||
212 | upperCase = namespace >>= \ns -> (if constructorNamespace ns then check "uppercase ident" (isUpper . firstCaseChar) else id) $ tick ns <$> (identifier <|> try_ "tick ident" (('\'':) <$ char '\'' <*> identifier)) | ||
213 | lowerCase = namespace >>= \ns -> (if constructorNamespace ns then check "lowercase ident" (isLower . firstCaseChar) else id) identifier | ||
214 | <|> try_ "underscore ident" (('_':) <$ char '_' <*> identifier) | ||
215 | symbols = check "symbols" ((/=':') . head) operator | ||
216 | colonSymbols = "Cons" <$ reservedOp ":" <|> check "symbols" ((==':') . head) operator | ||
217 | 233 | ||
218 | moduleName = {-qualified_ todo-} expNS upperCase | 234 | moduleName = {-qualified_ todo-} expNS upperCase |
219 | patVar = lowerCase <|> "" <$ reserved "_" | 235 | patVar = lowerCase <|> "" <$ reserved "_" |
220 | --qIdent = {-qualified_ todo-} (lowerCase <|> upperCase) | ||
221 | backquotedIdent = try_ "backquoted ident" $ lexeme $ char '`' *> ((:) <$> satisfy isAlpha <*> many (satisfy isAlphaNum)) <* char '`' | ||
222 | operatorT = symbols <|> colonSymbols <|> backquotedIdent | 236 | operatorT = symbols <|> colonSymbols <|> backquotedIdent |
223 | varId = lowerCase <|> parens operatorT | 237 | varId = lowerCase <|> parens operatorT |
238 | --qIdent = {-qualified_ todo-} (lowerCase <|> upperCase) | ||
224 | 239 | ||
225 | {- | 240 | {- |
226 | qualified_ id = do | 241 | qualified_ id = do |
@@ -289,7 +304,7 @@ getFixity (fm, _) n = fromMaybe (InfixL, 9) $ Map.lookup n fm | |||
289 | 304 | ||
290 | ---------------------------------------------------------------------- | 305 | ---------------------------------------------------------------------- |
291 | ---------------------------------------------------------------------- | 306 | ---------------------------------------------------------------------- |
292 | -- copied from | 307 | -- modified version of |
293 | -- | 308 | -- |
294 | -- Module : Text.Parsec.Token | 309 | -- Module : Text.Parsec.Token |
295 | -- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007 | 310 | -- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007 |
@@ -497,10 +512,12 @@ number base baseDigit | |||
497 | reservedOp name = | 512 | reservedOp name = |
498 | lexeme $ try $ | 513 | lexeme $ try $ |
499 | do{ string name | 514 | do{ string name |
500 | ; notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name) | 515 | ; notFollowedBy opLetter <?> ("end of " ++ show name) |
501 | } | 516 | } |
502 | 517 | ||
503 | operator = | 518 | operator = operator_ oper |
519 | |||
520 | operator_ oper = | ||
504 | lexeme $ try $ | 521 | lexeme $ try $ |
505 | do{ name <- oper | 522 | do{ name <- oper |
506 | ; if (isReservedOp name) | 523 | ; if (isReservedOp name) |
@@ -509,39 +526,29 @@ operator = | |||
509 | } | 526 | } |
510 | 527 | ||
511 | oper = | 528 | oper = |
512 | do{ c <- (opStart languageDef) | 529 | do{ c <- opStart |
513 | ; cs <- many (opLetter languageDef) | 530 | ; cs <- many opLetter |
514 | ; return (c:cs) | 531 | ; return (c:cs) |
515 | } | 532 | } |
516 | <?> "operator" | 533 | <?> "operator" |
517 | 534 | ||
518 | isReservedOp name = | 535 | isReservedOp name = |
519 | isReserved (sort (reservedOpNames languageDef)) name | 536 | isReserved theReservedOpNames name |
520 | 537 | ||
538 | theReservedOpNames = sort reservedOpNames | ||
521 | 539 | ||
522 | ----------------------------------------------------------- | 540 | ----------------------------------------------------------- |
523 | -- Identifiers & Reserved words | 541 | -- Identifiers & Reserved words |
524 | ----------------------------------------------------------- | 542 | ----------------------------------------------------------- |
525 | reserved name = | 543 | reserved name = |
526 | lexeme $ try $ | 544 | lexeme $ try $ |
527 | do{ caseString name | 545 | do{ string name |
528 | ; notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name) | 546 | ; notFollowedBy identLetter <?> ("end of " ++ show name) |
529 | } | 547 | } |
530 | 548 | ||
531 | caseString name | 549 | identifier = identifier_ ident |
532 | | caseSensitive languageDef = string name | ||
533 | | otherwise = do{ walk name; return name } | ||
534 | where | ||
535 | walk [] = return () | ||
536 | walk (c:cs) = do{ caseChar c <?> msg; walk cs } | ||
537 | 550 | ||
538 | caseChar c | isAlpha c = char (toLower c) <|> char (toUpper c) | 551 | identifier_ ident = |
539 | | otherwise = char c | ||
540 | |||
541 | msg = show name | ||
542 | |||
543 | |||
544 | identifier = | ||
545 | lexeme $ try $ | 552 | lexeme $ try $ |
546 | do{ name <- ident | 553 | do{ name <- ident |
547 | ; if (isReservedName name) | 554 | ; if (isReservedName name) |
@@ -551,17 +558,14 @@ identifier = | |||
551 | 558 | ||
552 | 559 | ||
553 | ident | 560 | ident |
554 | = do{ c <- identStart languageDef | 561 | = do{ c <- identStart |
555 | ; cs <- many (identLetter languageDef) | 562 | ; cs <- many identLetter |
556 | ; return (c:cs) | 563 | ; return (c:cs) |
557 | } | 564 | } |
558 | <?> "identifier" | 565 | <?> "identifier" |
559 | 566 | ||
560 | isReservedName name | 567 | isReservedName name |
561 | = isReserved theReservedNames caseName | 568 | = isReserved theReservedNames name |
562 | where | ||
563 | caseName | caseSensitive languageDef = name | ||
564 | | otherwise = map toLower name | ||
565 | 569 | ||
566 | 570 | ||
567 | isReserved names name | 571 | isReserved names name |
@@ -573,11 +577,7 @@ isReserved names name | |||
573 | EQ -> True | 577 | EQ -> True |
574 | GT -> False | 578 | GT -> False |
575 | 579 | ||
576 | theReservedNames | 580 | theReservedNames = sort reservedNames |
577 | | caseSensitive languageDef = sort reserved | ||
578 | | otherwise = sort . map (map toLower) $ reserved | ||
579 | where | ||
580 | reserved = reservedNames languageDef | ||
581 | 581 | ||
582 | 582 | ||
583 | 583 | ||
@@ -588,46 +588,28 @@ symbol name | |||
588 | = lexeme (string name) | 588 | = lexeme (string name) |
589 | 589 | ||
590 | whiteSpace = ignoreAbsoluteIndentation (localTokenMode (const Pa.Any) whiteSpace') | 590 | whiteSpace = ignoreAbsoluteIndentation (localTokenMode (const Pa.Any) whiteSpace') |
591 | whiteSpace' | 591 | whiteSpace' = skipMany (simpleSpace <|> oneLineComment <|> multiLineComment <?> "") |
592 | | noLine && noMulti = skipMany (simpleSpace <?> "") | ||
593 | | noLine = skipMany (simpleSpace <|> multiLineComment <?> "") | ||
594 | | noMulti = skipMany (simpleSpace <|> oneLineComment <?> "") | ||
595 | | otherwise = skipMany (simpleSpace <|> oneLineComment <|> multiLineComment <?> "") | ||
596 | where | ||
597 | noLine = null (commentLine languageDef) | ||
598 | noMulti = null (commentStart languageDef) | ||
599 | 592 | ||
600 | simpleSpace = | 593 | simpleSpace = |
601 | skipMany1 (satisfy isSpace) | 594 | skipMany1 (satisfy isSpace) |
602 | 595 | ||
603 | oneLineComment = | 596 | oneLineComment = |
604 | do{ try (string (commentLine languageDef)) | 597 | do{ try (string commentLine) |
605 | ; skipMany (satisfy (/= '\n')) | 598 | ; skipMany (satisfy (/= '\n')) |
606 | ; return () | 599 | ; return () |
607 | } | 600 | } |
608 | 601 | ||
609 | multiLineComment = | 602 | multiLineComment = |
610 | do { try (string (commentStart languageDef)) | 603 | do { try (string commentStart) |
611 | ; inComment | 604 | ; inCommentMulti |
612 | } | 605 | } |
613 | 606 | ||
614 | inComment | ||
615 | | nestedComments languageDef = inCommentMulti | ||
616 | | otherwise = inCommentSingle | ||
617 | |||
618 | inCommentMulti | 607 | inCommentMulti |
619 | = do{ try (string (commentEnd languageDef)) ; return () } | 608 | = do{ try (string commentEnd) ; return () } |
620 | <|> do{ multiLineComment ; inCommentMulti } | 609 | <|> do{ multiLineComment ; inCommentMulti } |
621 | <|> do{ skipMany1 (noneOf startEnd) ; inCommentMulti } | 610 | <|> do{ skipMany1 (noneOf startEnd) ; inCommentMulti } |
622 | <|> do{ oneOf startEnd ; inCommentMulti } | 611 | <|> do{ oneOf startEnd ; inCommentMulti } |
623 | <?> "end of comment" | 612 | <?> "end of comment" |
624 | where | 613 | where |
625 | startEnd = nub (commentEnd languageDef ++ commentStart languageDef) | 614 | startEnd = nub (commentEnd ++ commentStart) |
626 | 615 | ||
627 | inCommentSingle | ||
628 | = do{ try (string (commentEnd languageDef)); return () } | ||
629 | <|> do{ skipMany1 (noneOf startEnd) ; inCommentSingle } | ||
630 | <|> do{ oneOf startEnd ; inCommentSingle } | ||
631 | <?> "end of comment" | ||
632 | where | ||
633 | startEnd = nub (commentEnd languageDef ++ commentStart languageDef) | ||