From 9e1579d5074f895cce9ac4a32ea5b94c70e4b269 Mon Sep 17 00:00:00 2001
From: Joe Crayne <joe@jerkface.net>
Date: Thu, 21 Mar 2019 16:28:07 -0400
Subject: While parsing coments, use width 8 tabs.

---
 Comments.hs       | 31 -------------------
 GrepNested.hs     | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 monkeypatch.cabal |  2 +-
 monkeypatch.hs    | 50 +++++++++++++++++++++++++-----
 4 files changed, 137 insertions(+), 39 deletions(-)
 delete mode 100644 Comments.hs
 create mode 100644 GrepNested.hs

diff --git a/Comments.hs b/Comments.hs
deleted file mode 100644
index 25cd7de..0000000
--- a/Comments.hs
+++ /dev/null
@@ -1,31 +0,0 @@
-{-# LANGUAGE BangPatterns             #-}
-{-# LANGUAGE LambdaCase #-}
-module Comments where
-
-import Control.Arrow
-
-parseComments :: (Num col, Num lin) => lin -> col -> [Char] -> [(lin, col, [Char])]
-parseComments !lin !col = \case
-    ('/':'*':cs) -> let (lcnt,col',bcnt) = findCloser 1 (0,col,0) cs
-                        (xs,cs') = case drop (bcnt-2) cs of
-                            '*':'/':_ -> second (drop 2) $ splitAt (bcnt-2) cs
-                            _         -> splitAt bcnt cs
-                    in mkComment lin col xs : parseComments (lin + lcnt) col' cs'
-    ('/':'/':cs) -> let (comment,ds) = break (=='\n') cs
-                    in mkComment lin col comment : parseComments (lin + 1) 1 cs
-    ('\n' : cs) -> parseComments (lin+1) 1       cs
-    (  x  : cs) -> parseComments lin     (col+1) cs
-    []          -> []
-
-
-findCloser :: (Num a4, Num a3, Num a2, Num a1, Eq a1) =>
-                    a1 -> (a4, a2, a3) -> [Char] -> (a4, a2, a3)
-findCloser !1 (l,c,b) ('*':'/':_)  = (l,c+2,b+2)
-findCloser !d (l,c,b) ('*':'/':xs) = findCloser (d - 1) (l,c+2,b+2) xs
-findCloser !d (l,c,b) ('/':'*':xs) = findCloser (d + 1) (l,c+2,b+2) xs
-findCloser !d (l,c,b) ('\n':xs)    = findCloser d (l+1,1,b+1) xs
-findCloser !d (l,c,b) (_:xs)       = findCloser d (l,c+1,b+1) xs
-findCloser !d (l,c,b) []           = (l,c,b)
-
-mkComment :: a -> b -> c -> (a, b, c)
-mkComment lin no str = (lin,no,str)
diff --git a/GrepNested.hs b/GrepNested.hs
new file mode 100644
index 0000000..c62262f
--- /dev/null
+++ b/GrepNested.hs
@@ -0,0 +1,93 @@
+{-# LANGUAGE BangPatterns      #-}
+{-# LANGUAGE LambdaCase        #-}
+{-# LANGUAGE OverloadedStrings #-}
+module GrepNested
+        ( grepCComments
+        , grepNested
+        , CommentProtocol(..)
+        , CommentToken(..)
+        ) where
+
+import Control.Arrow
+import Data.List
+import Data.String
+import qualified Data.ByteString.Lazy.Char8 as BL
+
+grepCComments :: Num a => a -> Int -> String -> [(a, Int, String)]
+grepCComments !lin !col = grepNested ccomments lin col
+
+data CommentToken = NullInput   -- ^ Empty string.
+                  | OpenNested  -- ^ Open nested comment.
+                  | CloseNested -- ^ Close nested comment.
+                  | StartLine   -- ^ Open line-comment.
+                  | EOL         -- ^ Close line-comment.
+                  | IgnoredChar Int -- ^ Miscellaneous character.  Argument is column width (e.g. 8 for tabs, 1 otherwise).
+
+data CommentProtocol a = CommentProtocol
+    { cmtToken   :: a -> (CommentToken,Int,a)
+    , cmtSplitAt :: Int -> a -> (a,a)
+    }
+
+ccomments :: CommentProtocol String
+ccomments = CommentProtocol
+    { cmtToken = \case
+        []        -> (NullInput     , 0 , "")
+        '/':'*':s -> (OpenNested    , 2 , s)
+        '/':'/':s -> (StartLine     , 2 , s)
+        '*':'/':s -> (CloseNested   , 2 , s)
+        '\n':s    -> (EOL           , 1 , s)
+        '\t':s    -> (IgnoredChar 8 , 1 , s)
+        _:s       -> (IgnoredChar 1 , 1 , s)
+    , cmtSplitAt = splitAt
+    }
+
+ccommentsBL :: CommentProtocol BL.ByteString
+ccommentsBL = CommentProtocol
+    { cmtToken = \s ->
+        if BL.null s then (NullInput,0,s)
+        else if BL.isPrefixOf "/*" s then (OpenNested  , 2, BL.drop 2 s)
+        else if BL.isPrefixOf "*/" s then (CloseNested , 2, BL.drop 2 s)
+        else if BL.isPrefixOf "//" s then (StartLine   , 2, BL.drop 2 s)
+        else if BL.isPrefixOf "\n" s then (EOL         , 1, BL.drop 1 s)
+        else (IgnoredChar (if BL.head s=='\t' then 8 else 1),1,BL.drop 1 s)
+    , cmtSplitAt = BL.splitAt . fromIntegral
+    }
+
+grepNested :: Num a =>
+              CommentProtocol c -> a -> Int -> c -> [(a, Int, c)]
+grepNested p !lin !col s = case cmtToken p s of
+    (OpenNested,tlen,cs)
+            -> let (lcnt,col',bcnt,clserlen) = findCloser p 1 (0,col,0) cs
+                   (xs,cs') = case cmtSplitAt p (bcnt-clserlen) cs of
+                       (s1,s2) | (CloseNested,l,s3) <- cmtToken p s2
+                                 -> (s1, snd . cmtSplitAt p clserlen $ s2)
+                       _         -> cmtSplitAt p bcnt cs
+               in mkComment lin col xs : grepNested p (lin + lcnt) col' cs'
+    (StartLine,tlen,cs)
+            -> let findEOL !tot xs = case cmtToken p xs of
+                                        (EOL,tlen,_)  -> tot
+                                        (tok,tlen,ys) -> findEOL (tot + tlen) ys
+                   (comment,_) = cmtSplitAt p (findEOL 0 cs) cs
+               in mkComment lin col comment : grepNested p (lin + 1) 1 cs
+    (EOL,tlen,cs)              -> grepNested p (lin+1) 1 cs
+    (NullInput,tlen,_)         -> []
+    (IgnoredChar clen,_,cs) -> grepNested p lin (col+clen) cs
+
+
+findCloser :: Num lin => CommentProtocol t
+                         -> Int             -- ^ Nested comment depth, used by recursive calls, pass 1.
+                         -> (lin, Int, Int) -- ^ Accumulated result.
+                         -> t
+                         -> (lin, Int, Int, Int) -- ^ (line,col,bytes,closer length)
+findCloser p !d (l,c,b) s = l `seq` c `seq` b `seq` case cmtToken p s of
+    (NullInput,tlen,_)    -> (l,c,b,tlen)
+    (CloseNested,tlen,xs) -> if d==1 then (l,c+tlen,b+tlen,tlen)
+                                     else findCloser p (d - 1) (l,c+tlen,b+tlen) xs
+    (OpenNested,tlen,xs)  -> findCloser p (d + 1) (l,c+tlen,b+tlen) xs
+    (EOL,tlen,xs)         -> findCloser p d (l+1,1,b+tlen) xs
+    (IgnoredChar clen,tlen,xs) -> findCloser p d (l,c+clen,b+tlen) xs
+    (StartLine,tlen,xs) -> findCloser p d (l,c+tlen,b+tlen) xs
+
+
+mkComment :: a -> b -> c -> (a, b, c)
+mkComment lin no str = (lin,no,str)
diff --git a/monkeypatch.cabal b/monkeypatch.cabal
index ac31a6d..23bc405 100644
--- a/monkeypatch.cabal
+++ b/monkeypatch.cabal
@@ -52,7 +52,7 @@ extra-source-files:  CHANGELOG.md
 
 executable monkeypatch
   main-is:             monkeypatch.hs
-  other-modules:       Comments
+  other-modules:       GrepNested
   -- other-extensions:
   build-depends:       base >=4.10.1.0  && <=4.12
                      , containers ^>=0.5.10.2
diff --git a/monkeypatch.hs b/monkeypatch.hs
index 461d3c1..ea97252 100644
--- a/monkeypatch.hs
+++ b/monkeypatch.hs
@@ -24,6 +24,7 @@ import qualified Data.IntMap           as IntMap
 import qualified Data.Map           as Map
          ;import Data.Map           (Map)
 import Data.Maybe
+import Data.Ord
 import qualified Data.Set           as Set
          ;import Data.Set           (Set)
 import Language.C.Data.Ident        as C
@@ -47,7 +48,7 @@ import Text.PrettyPrint             (Doc, doubleQuotes, empty, text, vcat, ($$),
                                      (<+>))
 import Text.Show.Pretty
 
-import Comments
+import GrepNested
 
 trace :: p -> a -> a
 trace _ = id
@@ -480,6 +481,31 @@ isFunctionDecl _ = False
 cleanTree :: (Functor f, Data (f b)) => f b -> f Ordering
 cleanTree d = fmap (const LT) $ everywhere (mkT eraseNodeInfo) $ d
 
+
+data SymbolExtent = SymbolExtent { startExtent :: Position, stopExtent :: Position }
+
+instance Show SymbolExtent where
+    show (SymbolExtent a b) = "sed -n "++show al++","++show bl++"p "++fn
+        where
+            fn = posFile a
+            al = posRow a
+            bl = posRow b
+
+getSymbolExtent :: (CNode a1, Data a2) =>
+                   SymbolInformation a2 -- ^ Symbol database record (symbolSource must have Postion data in it).
+                   -> [a1]              -- ^ function body (only used for filename)
+                   -> SymbolExtent
+getSymbolExtent sym bdy =
+    -- TODO: This could probably be a lot more efficient using NodeInfo's PosLength field.
+    let bdy_poss = map (posOfNode . nodeInfo) bdy
+        -- hpos = map (posOfNode . nodeInfo) (symbolSource sym)
+        cmodule = map posFile (take 1 $ filter isSourcePos $ bdy_poss) -- TODO: What if first statement is provided by macro?
+        allposss = listify (\p -> case cmodule of { [f] | isSourcePos p -> posFile p == f ; _ -> isSourcePos p }) (symbolSource sym) :: [Position]
+        start = minimumBy (comparing posRow) allposss
+        stop  = maximumBy (comparing posRow) allposss
+    in SymbolExtent start stop
+
+
 transpile :: C2HaskellOptions -> FilePath -> IncludeStack -> CTranslationUnit NodeInfo -> IO ()
 transpile o fname incs (CTranslUnit edecls _) = do
     let db = foldr update initTranspile edecls
@@ -529,6 +555,15 @@ transpile o fname incs (CTranslUnit edecls _) = do
 
                                         Nothing -> putStrLn $ "??: " ++ ppShow (cleanTree d)
                                     putStrLn ""
+                        let extent = getSymbolExtent sym bdy
+                        cs <- readComments (posFile $ startExtent extent)
+                        let (pcs,cs') = seekComment (startExtent extent) cs
+                            (cs'',ds) = seekComment (stopExtent extent) cs'
+                        -- lineOfComment
+                        forM_ cs' $ \(_,col,cmt) -> do
+                            putStrLn ""
+                            putStrLn $ replicate col ' ' ++ cmt
+                        print extent
     return ()
 
 isHeaderDecl :: CNode a => a -> Bool
@@ -895,8 +930,9 @@ enumCases (CDeclExt (CDecl xs _ ni)) = do
 lineOfComment :: (Int, b, String) -> Int
 lineOfComment (l,_,s) = l + length (lines s)
 
-seekComment :: NodeInfo -> [(Int,Int,String)] -> ([(Int,Int,String)],[(Int,Int,String)])
-seekComment ni cs = break (\c -> lineOfComment c>=posRow (posOfNode ni)) cs
+-- Break a comment list into comments preceding the given node and comments that come after it.
+seekComment :: Position -> [(Int,Int,String)] -> ([(Int,Int,String)],[(Int,Int,String)])
+seekComment pos cs = break (\c -> lineOfComment c>=posRow pos) cs
 
 strip :: [Char] -> [Char]
 strip = reverse . dropWhile isSpace . reverse . dropWhile isSpace
@@ -955,13 +991,13 @@ c2haskell opts cs cmodname missings incs (CTranslUnit edecls _) = do
                         cs <- maybe (return []) readComments symfile
                         -- mapM_ (hPutStrLn haskmod . commented . ppShow) $ cs
                         -- mapM_ (hPutStrLn haskmod . commented . ppShow) $ symbolSource si
-                        let (_,cs') = seekComment eni cs
+                        let (_,cs') = seekComment (posOfNode eni) cs
                         forM_ (take 1 cs') $ \(_,c,s) ->
                             when (c==1) $ hPutStr haskmod $ commented $ "| " ++ strip s
                         hPutStrLn haskmod $ unwords ["newtype",t,"=",t,"CInt"]
                         forM_ (zip es [0..]) $ \((e,_),n) -> do
                             let r = posRow . posOfNode . nodeInfo $ e
-                            case seekComment (nodeInfo e) cs' of
+                            case seekComment (posOfNode $ nodeInfo e) cs' of
                                 (_,(lno,cno,s):_) | lno==r-1 && cno==1
                                                     || cno>1 && lno == r
                                   -> hPutStr haskmod $ commented $ "| " ++ strip s
@@ -1356,8 +1392,8 @@ goMissing haskmod db cfun = do
                     htyp -> hPutStr haskmod $ commented $ "Unsupported haskell type: " ++ HS.prettyPrint htyp
 
 
-readComments :: (Num lin, Num col) => FilePath -> IO [(lin, col, [Char])]
-readComments fname = parseComments 1 1 <$> readFile fname
+readComments :: Num lin => FilePath -> IO [(lin, Int, [Char])]
+readComments fname = grepCComments 1 1 <$> readFile fname
 
 sanitizeArgs :: [String] -> [String]
 sanitizeArgs (('-':'M':_):args) = sanitizeArgs args
-- 
cgit v1.2.3