summaryrefslogtreecommitdiff
path: root/GrepNested.hs
diff options
context:
space:
mode:
Diffstat (limited to 'GrepNested.hs')
-rw-r--r--GrepNested.hs93
1 files changed, 93 insertions, 0 deletions
diff --git a/GrepNested.hs b/GrepNested.hs
new file mode 100644
index 0000000..c62262f
--- /dev/null
+++ b/GrepNested.hs
@@ -0,0 +1,93 @@
1{-# LANGUAGE BangPatterns #-}
2{-# LANGUAGE LambdaCase #-}
3{-# LANGUAGE OverloadedStrings #-}
4module GrepNested
5 ( grepCComments
6 , grepNested
7 , CommentProtocol(..)
8 , CommentToken(..)
9 ) where
10
11import Control.Arrow
12import Data.List
13import Data.String
14import qualified Data.ByteString.Lazy.Char8 as BL
15
16grepCComments :: Num a => a -> Int -> String -> [(a, Int, String)]
17grepCComments !lin !col = grepNested ccomments lin col
18
19data CommentToken = NullInput -- ^ Empty string.
20 | OpenNested -- ^ Open nested comment.
21 | CloseNested -- ^ Close nested comment.
22 | StartLine -- ^ Open line-comment.
23 | EOL -- ^ Close line-comment.
24 | IgnoredChar Int -- ^ Miscellaneous character. Argument is column width (e.g. 8 for tabs, 1 otherwise).
25
26data CommentProtocol a = CommentProtocol
27 { cmtToken :: a -> (CommentToken,Int,a)
28 , cmtSplitAt :: Int -> a -> (a,a)
29 }
30
31ccomments :: CommentProtocol String
32ccomments = CommentProtocol
33 { cmtToken = \case
34 [] -> (NullInput , 0 , "")
35 '/':'*':s -> (OpenNested , 2 , s)
36 '/':'/':s -> (StartLine , 2 , s)
37 '*':'/':s -> (CloseNested , 2 , s)
38 '\n':s -> (EOL , 1 , s)
39 '\t':s -> (IgnoredChar 8 , 1 , s)
40 _:s -> (IgnoredChar 1 , 1 , s)
41 , cmtSplitAt = splitAt
42 }
43
44ccommentsBL :: CommentProtocol BL.ByteString
45ccommentsBL = CommentProtocol
46 { cmtToken = \s ->
47 if BL.null s then (NullInput,0,s)
48 else if BL.isPrefixOf "/*" s then (OpenNested , 2, BL.drop 2 s)
49 else if BL.isPrefixOf "*/" s then (CloseNested , 2, BL.drop 2 s)
50 else if BL.isPrefixOf "//" s then (StartLine , 2, BL.drop 2 s)
51 else if BL.isPrefixOf "\n" s then (EOL , 1, BL.drop 1 s)
52 else (IgnoredChar (if BL.head s=='\t' then 8 else 1),1,BL.drop 1 s)
53 , cmtSplitAt = BL.splitAt . fromIntegral
54 }
55
56grepNested :: Num a =>
57 CommentProtocol c -> a -> Int -> c -> [(a, Int, c)]
58grepNested p !lin !col s = case cmtToken p s of
59 (OpenNested,tlen,cs)
60 -> let (lcnt,col',bcnt,clserlen) = findCloser p 1 (0,col,0) cs
61 (xs,cs') = case cmtSplitAt p (bcnt-clserlen) cs of
62 (s1,s2) | (CloseNested,l,s3) <- cmtToken p s2
63 -> (s1, snd . cmtSplitAt p clserlen $ s2)
64 _ -> cmtSplitAt p bcnt cs
65 in mkComment lin col xs : grepNested p (lin + lcnt) col' cs'
66 (StartLine,tlen,cs)
67 -> let findEOL !tot xs = case cmtToken p xs of
68 (EOL,tlen,_) -> tot
69 (tok,tlen,ys) -> findEOL (tot + tlen) ys
70 (comment,_) = cmtSplitAt p (findEOL 0 cs) cs
71 in mkComment lin col comment : grepNested p (lin + 1) 1 cs
72 (EOL,tlen,cs) -> grepNested p (lin+1) 1 cs
73 (NullInput,tlen,_) -> []
74 (IgnoredChar clen,_,cs) -> grepNested p lin (col+clen) cs
75
76
77findCloser :: Num lin => CommentProtocol t
78 -> Int -- ^ Nested comment depth, used by recursive calls, pass 1.
79 -> (lin, Int, Int) -- ^ Accumulated result.
80 -> t
81 -> (lin, Int, Int, Int) -- ^ (line,col,bytes,closer length)
82findCloser p !d (l,c,b) s = l `seq` c `seq` b `seq` case cmtToken p s of
83 (NullInput,tlen,_) -> (l,c,b,tlen)
84 (CloseNested,tlen,xs) -> if d==1 then (l,c+tlen,b+tlen,tlen)
85 else findCloser p (d - 1) (l,c+tlen,b+tlen) xs
86 (OpenNested,tlen,xs) -> findCloser p (d + 1) (l,c+tlen,b+tlen) xs
87 (EOL,tlen,xs) -> findCloser p d (l+1,1,b+tlen) xs
88 (IgnoredChar clen,tlen,xs) -> findCloser p d (l,c+clen,b+tlen) xs
89 (StartLine,tlen,xs) -> findCloser p d (l,c+tlen,b+tlen) xs
90
91
92mkComment :: a -> b -> c -> (a, b, c)
93mkComment lin no str = (lin,no,str)