{-# LANGUAGE BangPatterns #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} module GrepNested ( grepCComments , grepNested , CommentProtocol(..) , CommentToken(..) ) where import Control.Arrow import Data.List import Data.String import qualified Data.ByteString.Lazy.Char8 as BL grepCComments :: Num a => a -> Int -> String -> [(a, Int, String)] grepCComments !lin !col = grepNested ccomments lin col data CommentToken = NullInput -- ^ Empty string. | OpenNested -- ^ Open nested comment. | CloseNested -- ^ Close nested comment. | StartLine -- ^ Open line-comment. | EOL -- ^ Close line-comment. | IgnoredChar Int -- ^ Miscellaneous character. Argument is column width (e.g. 8 for tabs, 1 otherwise). data CommentProtocol a = CommentProtocol { cmtToken :: a -> (CommentToken,Int,a) , cmtSplitAt :: Int -> a -> (a,a) } ccomments :: CommentProtocol String ccomments = CommentProtocol { cmtToken = \case [] -> (NullInput , 0 , "") '/':'*':s -> (OpenNested , 2 , s) '/':'/':s -> (StartLine , 2 , s) '*':'/':s -> (CloseNested , 2 , s) '\n':s -> (EOL , 1 , s) '\t':s -> (IgnoredChar 8 , 1 , s) _:s -> (IgnoredChar 1 , 1 , s) , cmtSplitAt = splitAt } ccommentsBL :: CommentProtocol BL.ByteString ccommentsBL = CommentProtocol { cmtToken = \s -> if BL.null s then (NullInput,0,s) else if BL.isPrefixOf "/*" s then (OpenNested , 2, BL.drop 2 s) else if BL.isPrefixOf "*/" s then (CloseNested , 2, BL.drop 2 s) else if BL.isPrefixOf "//" s then (StartLine , 2, BL.drop 2 s) else if BL.isPrefixOf "\n" s then (EOL , 1, BL.drop 1 s) else (IgnoredChar (if BL.head s=='\t' then 8 else 1),1,BL.drop 1 s) , cmtSplitAt = BL.splitAt . fromIntegral } grepNested :: Num a => CommentProtocol c -> a -> Int -> c -> [(a, Int, c)] grepNested p !lin !col s = case cmtToken p s of (OpenNested,tlen,cs) -> let (lcnt,col',bcnt,clserlen) = findCloser p 1 (0,col,0) cs (xs,cs') = case cmtSplitAt p (bcnt-clserlen) cs of (s1,s2) | (CloseNested,l,s3) <- cmtToken p s2 -> (s1, snd . cmtSplitAt p clserlen $ s2) _ -> cmtSplitAt p bcnt cs in mkComment lin col xs : grepNested p (lin + lcnt) col' cs' (StartLine,tlen,cs) -> let findEOL !tot xs = case cmtToken p xs of (EOL,tlen,_) -> (tot , tlen ) (tok,tlen,ys) -> findEOL (tot + tlen) ys (clen,dlen) = findEOL 0 cs (comment,ds) = cmtSplitAt p clen cs (_,es) = cmtSplitAt p dlen ds in mkComment lin col comment : grepNested p (lin + 1) 1 es (EOL,tlen,cs) -> grepNested p (lin+1) 1 cs (NullInput,tlen,_) -> [] (IgnoredChar clen,_,cs) -> grepNested p lin (col+clen) cs (CloseNested,tlen,cs) -> grepNested p lin (col+tlen) cs findCloser :: Num lin => CommentProtocol t -> Int -- ^ Nested comment depth, used by recursive calls, pass 1. -> (lin, Int, Int) -- ^ Accumulated result. -> t -> (lin, Int, Int, Int) -- ^ (line,col,bytes,closer length) findCloser p !d (l,c,b) s = l `seq` c `seq` b `seq` case cmtToken p s of (NullInput,tlen,_) -> (l,c,b,tlen) (CloseNested,tlen,xs) -> if d==1 then (l,c+tlen,b+tlen,tlen) else findCloser p (d - 1) (l,c+tlen,b+tlen) xs (OpenNested,tlen,xs) -> findCloser p (d + 1) (l,c+tlen,b+tlen) xs (EOL,tlen,xs) -> findCloser p d (l+1,1,b+tlen) xs (IgnoredChar clen,tlen,xs) -> findCloser p d (l,c+clen,b+tlen) xs (StartLine,tlen,xs) -> findCloser p d (l,c+tlen,b+tlen) xs mkComment :: a -> b -> c -> (a, b, c) mkComment lin no str = (lin,no,str)