From 9e1579d5074f895cce9ac4a32ea5b94c70e4b269 Mon Sep 17 00:00:00 2001 From: Joe Crayne Date: Thu, 21 Mar 2019 16:28:07 -0400 Subject: While parsing coments, use width 8 tabs. --- Comments.hs | 31 ------------------- GrepNested.hs | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ monkeypatch.cabal | 2 +- monkeypatch.hs | 50 +++++++++++++++++++++++++----- 4 files changed, 137 insertions(+), 39 deletions(-) delete mode 100644 Comments.hs create mode 100644 GrepNested.hs diff --git a/Comments.hs b/Comments.hs deleted file mode 100644 index 25cd7de..0000000 --- a/Comments.hs +++ /dev/null @@ -1,31 +0,0 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE LambdaCase #-} -module Comments where - -import Control.Arrow - -parseComments :: (Num col, Num lin) => lin -> col -> [Char] -> [(lin, col, [Char])] -parseComments !lin !col = \case - ('/':'*':cs) -> let (lcnt,col',bcnt) = findCloser 1 (0,col,0) cs - (xs,cs') = case drop (bcnt-2) cs of - '*':'/':_ -> second (drop 2) $ splitAt (bcnt-2) cs - _ -> splitAt bcnt cs - in mkComment lin col xs : parseComments (lin + lcnt) col' cs' - ('/':'/':cs) -> let (comment,ds) = break (=='\n') cs - in mkComment lin col comment : parseComments (lin + 1) 1 cs - ('\n' : cs) -> parseComments (lin+1) 1 cs - ( x : cs) -> parseComments lin (col+1) cs - [] -> [] - - -findCloser :: (Num a4, Num a3, Num a2, Num a1, Eq a1) => - a1 -> (a4, a2, a3) -> [Char] -> (a4, a2, a3) -findCloser !1 (l,c,b) ('*':'/':_) = (l,c+2,b+2) -findCloser !d (l,c,b) ('*':'/':xs) = findCloser (d - 1) (l,c+2,b+2) xs -findCloser !d (l,c,b) ('/':'*':xs) = findCloser (d + 1) (l,c+2,b+2) xs -findCloser !d (l,c,b) ('\n':xs) = findCloser d (l+1,1,b+1) xs -findCloser !d (l,c,b) (_:xs) = findCloser d (l,c+1,b+1) xs -findCloser !d (l,c,b) [] = (l,c,b) - -mkComment :: a -> b -> c -> (a, b, c) -mkComment lin no str = (lin,no,str) diff --git a/GrepNested.hs b/GrepNested.hs new file mode 100644 index 0000000..c62262f --- /dev/null +++ b/GrepNested.hs @@ -0,0 +1,93 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +module GrepNested + ( grepCComments + , grepNested + , CommentProtocol(..) + , CommentToken(..) + ) where + +import Control.Arrow +import Data.List +import Data.String +import qualified Data.ByteString.Lazy.Char8 as BL + +grepCComments :: Num a => a -> Int -> String -> [(a, Int, String)] +grepCComments !lin !col = grepNested ccomments lin col + +data CommentToken = NullInput -- ^ Empty string. + | OpenNested -- ^ Open nested comment. + | CloseNested -- ^ Close nested comment. + | StartLine -- ^ Open line-comment. + | EOL -- ^ Close line-comment. + | IgnoredChar Int -- ^ Miscellaneous character. Argument is column width (e.g. 8 for tabs, 1 otherwise). + +data CommentProtocol a = CommentProtocol + { cmtToken :: a -> (CommentToken,Int,a) + , cmtSplitAt :: Int -> a -> (a,a) + } + +ccomments :: CommentProtocol String +ccomments = CommentProtocol + { cmtToken = \case + [] -> (NullInput , 0 , "") + '/':'*':s -> (OpenNested , 2 , s) + '/':'/':s -> (StartLine , 2 , s) + '*':'/':s -> (CloseNested , 2 , s) + '\n':s -> (EOL , 1 , s) + '\t':s -> (IgnoredChar 8 , 1 , s) + _:s -> (IgnoredChar 1 , 1 , s) + , cmtSplitAt = splitAt + } + +ccommentsBL :: CommentProtocol BL.ByteString +ccommentsBL = CommentProtocol + { cmtToken = \s -> + if BL.null s then (NullInput,0,s) + else if BL.isPrefixOf "/*" s then (OpenNested , 2, BL.drop 2 s) + else if BL.isPrefixOf "*/" s then (CloseNested , 2, BL.drop 2 s) + else if BL.isPrefixOf "//" s then (StartLine , 2, BL.drop 2 s) + else if BL.isPrefixOf "\n" s then (EOL , 1, BL.drop 1 s) + else (IgnoredChar (if BL.head s=='\t' then 8 else 1),1,BL.drop 1 s) + , cmtSplitAt = BL.splitAt . fromIntegral + } + +grepNested :: Num a => + CommentProtocol c -> a -> Int -> c -> [(a, Int, c)] +grepNested p !lin !col s = case cmtToken p s of + (OpenNested,tlen,cs) + -> let (lcnt,col',bcnt,clserlen) = findCloser p 1 (0,col,0) cs + (xs,cs') = case cmtSplitAt p (bcnt-clserlen) cs of + (s1,s2) | (CloseNested,l,s3) <- cmtToken p s2 + -> (s1, snd . cmtSplitAt p clserlen $ s2) + _ -> cmtSplitAt p bcnt cs + in mkComment lin col xs : grepNested p (lin + lcnt) col' cs' + (StartLine,tlen,cs) + -> let findEOL !tot xs = case cmtToken p xs of + (EOL,tlen,_) -> tot + (tok,tlen,ys) -> findEOL (tot + tlen) ys + (comment,_) = cmtSplitAt p (findEOL 0 cs) cs + in mkComment lin col comment : grepNested p (lin + 1) 1 cs + (EOL,tlen,cs) -> grepNested p (lin+1) 1 cs + (NullInput,tlen,_) -> [] + (IgnoredChar clen,_,cs) -> grepNested p lin (col+clen) cs + + +findCloser :: Num lin => CommentProtocol t + -> Int -- ^ Nested comment depth, used by recursive calls, pass 1. + -> (lin, Int, Int) -- ^ Accumulated result. + -> t + -> (lin, Int, Int, Int) -- ^ (line,col,bytes,closer length) +findCloser p !d (l,c,b) s = l `seq` c `seq` b `seq` case cmtToken p s of + (NullInput,tlen,_) -> (l,c,b,tlen) + (CloseNested,tlen,xs) -> if d==1 then (l,c+tlen,b+tlen,tlen) + else findCloser p (d - 1) (l,c+tlen,b+tlen) xs + (OpenNested,tlen,xs) -> findCloser p (d + 1) (l,c+tlen,b+tlen) xs + (EOL,tlen,xs) -> findCloser p d (l+1,1,b+tlen) xs + (IgnoredChar clen,tlen,xs) -> findCloser p d (l,c+clen,b+tlen) xs + (StartLine,tlen,xs) -> findCloser p d (l,c+tlen,b+tlen) xs + + +mkComment :: a -> b -> c -> (a, b, c) +mkComment lin no str = (lin,no,str) diff --git a/monkeypatch.cabal b/monkeypatch.cabal index ac31a6d..23bc405 100644 --- a/monkeypatch.cabal +++ b/monkeypatch.cabal @@ -52,7 +52,7 @@ extra-source-files: CHANGELOG.md executable monkeypatch main-is: monkeypatch.hs - other-modules: Comments + other-modules: GrepNested -- other-extensions: build-depends: base >=4.10.1.0 && <=4.12 , containers ^>=0.5.10.2 diff --git a/monkeypatch.hs b/monkeypatch.hs index 461d3c1..ea97252 100644 --- a/monkeypatch.hs +++ b/monkeypatch.hs @@ -24,6 +24,7 @@ import qualified Data.IntMap as IntMap import qualified Data.Map as Map ;import Data.Map (Map) import Data.Maybe +import Data.Ord import qualified Data.Set as Set ;import Data.Set (Set) import Language.C.Data.Ident as C @@ -47,7 +48,7 @@ import Text.PrettyPrint (Doc, doubleQuotes, empty, text, vcat, ($$), (<+>)) import Text.Show.Pretty -import Comments +import GrepNested trace :: p -> a -> a trace _ = id @@ -480,6 +481,31 @@ isFunctionDecl _ = False cleanTree :: (Functor f, Data (f b)) => f b -> f Ordering cleanTree d = fmap (const LT) $ everywhere (mkT eraseNodeInfo) $ d + +data SymbolExtent = SymbolExtent { startExtent :: Position, stopExtent :: Position } + +instance Show SymbolExtent where + show (SymbolExtent a b) = "sed -n "++show al++","++show bl++"p "++fn + where + fn = posFile a + al = posRow a + bl = posRow b + +getSymbolExtent :: (CNode a1, Data a2) => + SymbolInformation a2 -- ^ Symbol database record (symbolSource must have Postion data in it). + -> [a1] -- ^ function body (only used for filename) + -> SymbolExtent +getSymbolExtent sym bdy = + -- TODO: This could probably be a lot more efficient using NodeInfo's PosLength field. + let bdy_poss = map (posOfNode . nodeInfo) bdy + -- hpos = map (posOfNode . nodeInfo) (symbolSource sym) + cmodule = map posFile (take 1 $ filter isSourcePos $ bdy_poss) -- TODO: What if first statement is provided by macro? + allposss = listify (\p -> case cmodule of { [f] | isSourcePos p -> posFile p == f ; _ -> isSourcePos p }) (symbolSource sym) :: [Position] + start = minimumBy (comparing posRow) allposss + stop = maximumBy (comparing posRow) allposss + in SymbolExtent start stop + + transpile :: C2HaskellOptions -> FilePath -> IncludeStack -> CTranslationUnit NodeInfo -> IO () transpile o fname incs (CTranslUnit edecls _) = do let db = foldr update initTranspile edecls @@ -529,6 +555,15 @@ transpile o fname incs (CTranslUnit edecls _) = do Nothing -> putStrLn $ "??: " ++ ppShow (cleanTree d) putStrLn "" + let extent = getSymbolExtent sym bdy + cs <- readComments (posFile $ startExtent extent) + let (pcs,cs') = seekComment (startExtent extent) cs + (cs'',ds) = seekComment (stopExtent extent) cs' + -- lineOfComment + forM_ cs' $ \(_,col,cmt) -> do + putStrLn "" + putStrLn $ replicate col ' ' ++ cmt + print extent return () isHeaderDecl :: CNode a => a -> Bool @@ -895,8 +930,9 @@ enumCases (CDeclExt (CDecl xs _ ni)) = do lineOfComment :: (Int, b, String) -> Int lineOfComment (l,_,s) = l + length (lines s) -seekComment :: NodeInfo -> [(Int,Int,String)] -> ([(Int,Int,String)],[(Int,Int,String)]) -seekComment ni cs = break (\c -> lineOfComment c>=posRow (posOfNode ni)) cs +-- Break a comment list into comments preceding the given node and comments that come after it. +seekComment :: Position -> [(Int,Int,String)] -> ([(Int,Int,String)],[(Int,Int,String)]) +seekComment pos cs = break (\c -> lineOfComment c>=posRow pos) cs strip :: [Char] -> [Char] strip = reverse . dropWhile isSpace . reverse . dropWhile isSpace @@ -955,13 +991,13 @@ c2haskell opts cs cmodname missings incs (CTranslUnit edecls _) = do cs <- maybe (return []) readComments symfile -- mapM_ (hPutStrLn haskmod . commented . ppShow) $ cs -- mapM_ (hPutStrLn haskmod . commented . ppShow) $ symbolSource si - let (_,cs') = seekComment eni cs + let (_,cs') = seekComment (posOfNode eni) cs forM_ (take 1 cs') $ \(_,c,s) -> when (c==1) $ hPutStr haskmod $ commented $ "| " ++ strip s hPutStrLn haskmod $ unwords ["newtype",t,"=",t,"CInt"] forM_ (zip es [0..]) $ \((e,_),n) -> do let r = posRow . posOfNode . nodeInfo $ e - case seekComment (nodeInfo e) cs' of + case seekComment (posOfNode $ nodeInfo e) cs' of (_,(lno,cno,s):_) | lno==r-1 && cno==1 || cno>1 && lno == r -> hPutStr haskmod $ commented $ "| " ++ strip s @@ -1356,8 +1392,8 @@ goMissing haskmod db cfun = do htyp -> hPutStr haskmod $ commented $ "Unsupported haskell type: " ++ HS.prettyPrint htyp -readComments :: (Num lin, Num col) => FilePath -> IO [(lin, col, [Char])] -readComments fname = parseComments 1 1 <$> readFile fname +readComments :: Num lin => FilePath -> IO [(lin, Int, [Char])] +readComments fname = grepCComments 1 1 <$> readFile fname sanitizeArgs :: [String] -> [String] sanitizeArgs (('-':'M':_):args) = sanitizeArgs args -- cgit v1.2.3