summaryrefslogtreecommitdiff
path: root/src/LambdaCube/Compiler/Lexer.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/LambdaCube/Compiler/Lexer.hs')
-rw-r--r--src/LambdaCube/Compiler/Lexer.hs130
1 files changed, 56 insertions, 74 deletions
diff --git a/src/LambdaCube/Compiler/Lexer.hs b/src/LambdaCube/Compiler/Lexer.hs
index dd61653c..1fc7ae79 100644
--- a/src/LambdaCube/Compiler/Lexer.hs
+++ b/src/LambdaCube/Compiler/Lexer.hs
@@ -1,4 +1,4 @@
1-- contains Haskell source code copied from Text.Parsec.Token, see below 1-- contains modified Haskell source code copied from Text.Parsec.Token, see below
2{-# LANGUAGE LambdaCase #-} 2{-# LANGUAGE LambdaCase #-}
3{-# LANGUAGE ViewPatterns #-} 3{-# LANGUAGE ViewPatterns #-}
4{-# LANGUAGE PatternSynonyms #-} 4{-# LANGUAGE PatternSynonyms #-}
@@ -29,7 +29,7 @@ import Control.DeepSeq
29import Text.Parsec hiding (label, Empty, State, (<|>), many) 29import Text.Parsec hiding (label, Empty, State, (<|>), many)
30import qualified Text.Parsec as Pa 30import qualified Text.Parsec as Pa
31import qualified Text.Parsec.Token as Pa 31import qualified Text.Parsec.Token as Pa
32import Text.ParserCombinators.Parsec.Language (GenLanguageDef (..)) 32import Text.ParserCombinators.Parsec.Language (GenLanguageDef)--hiding (identStart, identLetter, opStart, opLetter, reservedOpNames)
33import qualified Text.ParserCombinators.Parsec.Language as Pa 33import qualified Text.ParserCombinators.Parsec.Language as Pa
34import Text.Parsec.Indentation hiding (Any) 34import Text.Parsec.Indentation hiding (Any)
35import qualified Text.Parsec.Indentation as Pa 35import qualified Text.Parsec.Indentation as Pa
@@ -73,12 +73,23 @@ namespace = asks snd
73{-# INLINE languageDef #-} 73{-# INLINE languageDef #-}
74languageDef :: GenLanguageDef (IndentStream (CharIndentStream String)) SourcePos InnerP 74languageDef :: GenLanguageDef (IndentStream (CharIndentStream String)) SourcePos InnerP
75languageDef = Pa.haskellDef 75languageDef = Pa.haskellDef
76 { Pa.identStart = indentStreamParser $ charIndentStreamParser $ letter <|> char '_' -- '_' is included also 76 { Pa.identStart = undefined
77 , Pa.identLetter = indentStreamParser $ charIndentStreamParser $ alphaNum <|> oneOf "_'#" 77 , Pa.identLetter = undefined
78 , Pa.opStart = indentStreamParser $ charIndentStreamParser $ oneOf ":!#$%&*+./<=>?@\\^|-~" 78 , Pa.opStart = undefined
79 , Pa.opLetter = indentStreamParser $ charIndentStreamParser $ oneOf ":!#$%&*+./<=>?@\\^|-~" 79 , Pa.opLetter = undefined
80 } 80 }
81 81
82reservedNames = Pa.reservedNames languageDef
83reservedOpNames = Pa.reservedOpNames languageDef
84commentLine = Pa.commentLine languageDef
85commentStart = Pa.commentStart languageDef
86commentEnd = Pa.commentEnd languageDef
87identStart = letter <|> char '_' -- '_' is included also
88identLetter = alphaNum <|> oneOf "_'#"
89opStart = oneOf ":!#$%&*+./<=>?@\\^|-~"
90opStart' = oneOf "!#$%&*+./<=>?@\\^|-~"
91opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"
92
82lexeme p = p <* (getPosition >>= setState >> whiteSpace) 93lexeme p = p <* (getPosition >>= setState >> whiteSpace)
83 94
84-------------------------------------------------------------------------------- names 95-------------------------------------------------------------------------------- names
@@ -202,25 +213,29 @@ switchNS = modifyLevel $ \case ExpLevel -> TypeLevel; TypeLevel -> ExpLevel
202 213
203-------------------------------------------------------------------------------- identifiers 214-------------------------------------------------------------------------------- identifiers
204 215
205check msg p m = try_ msg $ mfilter p m 216maybeStartWith p i = i <|> (:) <$> satisfy p <*> i
217
218upperCase, lowerCase, symbols, colonSymbols, backquotedIdent :: P SName
219
220upperCase = namespace >>= \ns -> tick ns <$> identifier_ (maybeStartWith (=='\'') $ if constructorNamespace ns then (:) <$> satisfy isUpper <*> many identLetter else ident) <?> "uppercase ident"
221lowerCase = namespace >>= \ns -> identifier_ (if constructorNamespace ns then (:) <$> satisfy (\c -> isLower c || c == '_') <*> many identLetter else ident) <?> "lowercase ident"
222backquotedIdent = lexeme $ try_ "backquoted ident" $ expect "reserved word" isReservedName $ char '`' *> ident <* char '`'
223symbols = operator_ ((:) <$> opStart' <*> many opLetter) <?> "symbols"
224colonSymbols = trCons <$> operator_ ((:) <$> satisfy (== ':') <*> many opLetter) <?> "op symbols"
225 where
226 trCons ":" = "Cons"
227 trCons x = x
228
229expect msg p i = i >>= \n -> if (p n) then unexpected (msg ++ " " ++ show n) else return n
206 230
207firstCaseChar ('\'': c: _) = c
208firstCaseChar (c: _) = c
209 231
210upperCase, lowerCase, symbols, colonSymbols :: P SName 232-----------------
211--upperCase NonTypeNamespace = mzero -- todo
212upperCase = namespace >>= \ns -> (if constructorNamespace ns then check "uppercase ident" (isUpper . firstCaseChar) else id) $ tick ns <$> (identifier <|> try_ "tick ident" (('\'':) <$ char '\'' <*> identifier))
213lowerCase = namespace >>= \ns -> (if constructorNamespace ns then check "lowercase ident" (isLower . firstCaseChar) else id) identifier
214 <|> try_ "underscore ident" (('_':) <$ char '_' <*> identifier)
215symbols = check "symbols" ((/=':') . head) operator
216colonSymbols = "Cons" <$ reservedOp ":" <|> check "symbols" ((==':') . head) operator
217 233
218moduleName = {-qualified_ todo-} expNS upperCase 234moduleName = {-qualified_ todo-} expNS upperCase
219patVar = lowerCase <|> "" <$ reserved "_" 235patVar = lowerCase <|> "" <$ reserved "_"
220--qIdent = {-qualified_ todo-} (lowerCase <|> upperCase)
221backquotedIdent = try_ "backquoted ident" $ lexeme $ char '`' *> ((:) <$> satisfy isAlpha <*> many (satisfy isAlphaNum)) <* char '`'
222operatorT = symbols <|> colonSymbols <|> backquotedIdent 236operatorT = symbols <|> colonSymbols <|> backquotedIdent
223varId = lowerCase <|> parens operatorT 237varId = lowerCase <|> parens operatorT
238--qIdent = {-qualified_ todo-} (lowerCase <|> upperCase)
224 239
225{- 240{-
226qualified_ id = do 241qualified_ id = do
@@ -289,7 +304,7 @@ getFixity (fm, _) n = fromMaybe (InfixL, 9) $ Map.lookup n fm
289 304
290---------------------------------------------------------------------- 305----------------------------------------------------------------------
291---------------------------------------------------------------------- 306----------------------------------------------------------------------
292-- copied from 307-- modified version of
293-- 308--
294-- Module : Text.Parsec.Token 309-- Module : Text.Parsec.Token
295-- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007 310-- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007
@@ -497,10 +512,12 @@ number base baseDigit
497reservedOp name = 512reservedOp name =
498 lexeme $ try $ 513 lexeme $ try $
499 do{ string name 514 do{ string name
500 ; notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name) 515 ; notFollowedBy opLetter <?> ("end of " ++ show name)
501 } 516 }
502 517
503operator = 518operator = operator_ oper
519
520operator_ oper =
504 lexeme $ try $ 521 lexeme $ try $
505 do{ name <- oper 522 do{ name <- oper
506 ; if (isReservedOp name) 523 ; if (isReservedOp name)
@@ -509,39 +526,29 @@ operator =
509 } 526 }
510 527
511oper = 528oper =
512 do{ c <- (opStart languageDef) 529 do{ c <- opStart
513 ; cs <- many (opLetter languageDef) 530 ; cs <- many opLetter
514 ; return (c:cs) 531 ; return (c:cs)
515 } 532 }
516 <?> "operator" 533 <?> "operator"
517 534
518isReservedOp name = 535isReservedOp name =
519 isReserved (sort (reservedOpNames languageDef)) name 536 isReserved theReservedOpNames name
520 537
538theReservedOpNames = sort reservedOpNames
521 539
522----------------------------------------------------------- 540-----------------------------------------------------------
523-- Identifiers & Reserved words 541-- Identifiers & Reserved words
524----------------------------------------------------------- 542-----------------------------------------------------------
525reserved name = 543reserved name =
526 lexeme $ try $ 544 lexeme $ try $
527 do{ caseString name 545 do{ string name
528 ; notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name) 546 ; notFollowedBy identLetter <?> ("end of " ++ show name)
529 } 547 }
530 548
531caseString name 549identifier = identifier_ ident
532 | caseSensitive languageDef = string name
533 | otherwise = do{ walk name; return name }
534 where
535 walk [] = return ()
536 walk (c:cs) = do{ caseChar c <?> msg; walk cs }
537 550
538 caseChar c | isAlpha c = char (toLower c) <|> char (toUpper c) 551identifier_ ident =
539 | otherwise = char c
540
541 msg = show name
542
543
544identifier =
545 lexeme $ try $ 552 lexeme $ try $
546 do{ name <- ident 553 do{ name <- ident
547 ; if (isReservedName name) 554 ; if (isReservedName name)
@@ -551,17 +558,14 @@ identifier =
551 558
552 559
553ident 560ident
554 = do{ c <- identStart languageDef 561 = do{ c <- identStart
555 ; cs <- many (identLetter languageDef) 562 ; cs <- many identLetter
556 ; return (c:cs) 563 ; return (c:cs)
557 } 564 }
558 <?> "identifier" 565 <?> "identifier"
559 566
560isReservedName name 567isReservedName name
561 = isReserved theReservedNames caseName 568 = isReserved theReservedNames name
562 where
563 caseName | caseSensitive languageDef = name
564 | otherwise = map toLower name
565 569
566 570
567isReserved names name 571isReserved names name
@@ -573,11 +577,7 @@ isReserved names name
573 EQ -> True 577 EQ -> True
574 GT -> False 578 GT -> False
575 579
576theReservedNames 580theReservedNames = sort reservedNames
577 | caseSensitive languageDef = sort reserved
578 | otherwise = sort . map (map toLower) $ reserved
579 where
580 reserved = reservedNames languageDef
581 581
582 582
583 583
@@ -588,46 +588,28 @@ symbol name
588 = lexeme (string name) 588 = lexeme (string name)
589 589
590whiteSpace = ignoreAbsoluteIndentation (localTokenMode (const Pa.Any) whiteSpace') 590whiteSpace = ignoreAbsoluteIndentation (localTokenMode (const Pa.Any) whiteSpace')
591whiteSpace' 591whiteSpace' = skipMany (simpleSpace <|> oneLineComment <|> multiLineComment <?> "")
592 | noLine && noMulti = skipMany (simpleSpace <?> "")
593 | noLine = skipMany (simpleSpace <|> multiLineComment <?> "")
594 | noMulti = skipMany (simpleSpace <|> oneLineComment <?> "")
595 | otherwise = skipMany (simpleSpace <|> oneLineComment <|> multiLineComment <?> "")
596 where
597 noLine = null (commentLine languageDef)
598 noMulti = null (commentStart languageDef)
599 592
600simpleSpace = 593simpleSpace =
601 skipMany1 (satisfy isSpace) 594 skipMany1 (satisfy isSpace)
602 595
603oneLineComment = 596oneLineComment =
604 do{ try (string (commentLine languageDef)) 597 do{ try (string commentLine)
605 ; skipMany (satisfy (/= '\n')) 598 ; skipMany (satisfy (/= '\n'))
606 ; return () 599 ; return ()
607 } 600 }
608 601
609multiLineComment = 602multiLineComment =
610 do { try (string (commentStart languageDef)) 603 do { try (string commentStart)
611 ; inComment 604 ; inCommentMulti
612 } 605 }
613 606
614inComment
615 | nestedComments languageDef = inCommentMulti
616 | otherwise = inCommentSingle
617
618inCommentMulti 607inCommentMulti
619 = do{ try (string (commentEnd languageDef)) ; return () } 608 = do{ try (string commentEnd) ; return () }
620 <|> do{ multiLineComment ; inCommentMulti } 609 <|> do{ multiLineComment ; inCommentMulti }
621 <|> do{ skipMany1 (noneOf startEnd) ; inCommentMulti } 610 <|> do{ skipMany1 (noneOf startEnd) ; inCommentMulti }
622 <|> do{ oneOf startEnd ; inCommentMulti } 611 <|> do{ oneOf startEnd ; inCommentMulti }
623 <?> "end of comment" 612 <?> "end of comment"
624 where 613 where
625 startEnd = nub (commentEnd languageDef ++ commentStart languageDef) 614 startEnd = nub (commentEnd ++ commentStart)
626 615
627inCommentSingle
628 = do{ try (string (commentEnd languageDef)); return () }
629 <|> do{ skipMany1 (noneOf startEnd) ; inCommentSingle }
630 <|> do{ oneOf startEnd ; inCommentSingle }
631 <?> "end of comment"
632 where
633 startEnd = nub (commentEnd languageDef ++ commentStart languageDef)