1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module GrepNested
( grepCComments
, grepNested
, CommentProtocol(..)
, CommentToken(..)
) where
import Control.Arrow
import Data.List
import Data.String
import qualified Data.ByteString.Lazy.Char8 as BL
grepCComments :: Num a => a -> Int -> String -> [(a, Int, String)]
grepCComments !lin !col = grepNested ccomments lin col
data CommentToken = NullInput -- ^ Empty string.
| OpenNested -- ^ Open nested comment.
| CloseNested -- ^ Close nested comment.
| StartLine -- ^ Open line-comment.
| EOL -- ^ Close line-comment.
| IgnoredChar Int -- ^ Miscellaneous character. Argument is column width (e.g. 8 for tabs, 1 otherwise).
data CommentProtocol a = CommentProtocol
{ cmtToken :: a -> (CommentToken,Int,a)
, cmtSplitAt :: Int -> a -> (a,a)
}
ccomments :: CommentProtocol String
ccomments = CommentProtocol
{ cmtToken = \case
[] -> (NullInput , 0 , "")
'/':'*':s -> (OpenNested , 2 , s)
'/':'/':s -> (StartLine , 2 , s)
'*':'/':s -> (CloseNested , 2 , s)
'\n':s -> (EOL , 1 , s)
'\t':s -> (IgnoredChar 8 , 1 , s)
_:s -> (IgnoredChar 1 , 1 , s)
, cmtSplitAt = splitAt
}
ccommentsBL :: CommentProtocol BL.ByteString
ccommentsBL = CommentProtocol
{ cmtToken = \s ->
if BL.null s then (NullInput,0,s)
else if BL.isPrefixOf "/*" s then (OpenNested , 2, BL.drop 2 s)
else if BL.isPrefixOf "*/" s then (CloseNested , 2, BL.drop 2 s)
else if BL.isPrefixOf "//" s then (StartLine , 2, BL.drop 2 s)
else if BL.isPrefixOf "\n" s then (EOL , 1, BL.drop 1 s)
else (IgnoredChar (if BL.head s=='\t' then 8 else 1),1,BL.drop 1 s)
, cmtSplitAt = BL.splitAt . fromIntegral
}
grepNested :: Num a =>
CommentProtocol c -> a -> Int -> c -> [(a, Int, c)]
grepNested p !lin !col s = case cmtToken p s of
(OpenNested,tlen,cs)
-> let (lcnt,col',bcnt,clserlen) = findCloser p 1 (0,col,0) cs
(xs,cs') = case cmtSplitAt p (bcnt-clserlen) cs of
(s1,s2) | (CloseNested,l,s3) <- cmtToken p s2
-> (s1, snd . cmtSplitAt p clserlen $ s2)
_ -> cmtSplitAt p bcnt cs
in mkComment lin col xs : grepNested p (lin + lcnt) col' cs'
(StartLine,tlen,cs)
-> let findEOL !tot xs = case cmtToken p xs of
(EOL,tlen,_) -> (tot , tlen )
(tok,tlen,ys) -> findEOL (tot + tlen) ys
(clen,dlen) = findEOL 0 cs
(comment,ds) = cmtSplitAt p clen cs
(_,es) = cmtSplitAt p dlen ds
in mkComment lin col comment : grepNested p (lin + 1) 1 es
(EOL,tlen,cs) -> grepNested p (lin+1) 1 cs
(NullInput,tlen,_) -> []
(IgnoredChar clen,_,cs) -> grepNested p lin (col+clen) cs
findCloser :: Num lin => CommentProtocol t
-> Int -- ^ Nested comment depth, used by recursive calls, pass 1.
-> (lin, Int, Int) -- ^ Accumulated result.
-> t
-> (lin, Int, Int, Int) -- ^ (line,col,bytes,closer length)
findCloser p !d (l,c,b) s = l `seq` c `seq` b `seq` case cmtToken p s of
(NullInput,tlen,_) -> (l,c,b,tlen)
(CloseNested,tlen,xs) -> if d==1 then (l,c+tlen,b+tlen,tlen)
else findCloser p (d - 1) (l,c+tlen,b+tlen) xs
(OpenNested,tlen,xs) -> findCloser p (d + 1) (l,c+tlen,b+tlen) xs
(EOL,tlen,xs) -> findCloser p d (l+1,1,b+tlen) xs
(IgnoredChar clen,tlen,xs) -> findCloser p d (l,c+clen,b+tlen) xs
(StartLine,tlen,xs) -> findCloser p d (l,c+tlen,b+tlen) xs
mkComment :: a -> b -> c -> (a, b, c)
mkComment lin no str = (lin,no,str)
|