diff options
Diffstat (limited to 'GrepNested.hs')
-rw-r--r-- | GrepNested.hs | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/GrepNested.hs b/GrepNested.hs new file mode 100644 index 0000000..c62262f --- /dev/null +++ b/GrepNested.hs | |||
@@ -0,0 +1,93 @@ | |||
1 | {-# LANGUAGE BangPatterns #-} | ||
2 | {-# LANGUAGE LambdaCase #-} | ||
3 | {-# LANGUAGE OverloadedStrings #-} | ||
4 | module GrepNested | ||
5 | ( grepCComments | ||
6 | , grepNested | ||
7 | , CommentProtocol(..) | ||
8 | , CommentToken(..) | ||
9 | ) where | ||
10 | |||
11 | import Control.Arrow | ||
12 | import Data.List | ||
13 | import Data.String | ||
14 | import qualified Data.ByteString.Lazy.Char8 as BL | ||
15 | |||
16 | grepCComments :: Num a => a -> Int -> String -> [(a, Int, String)] | ||
17 | grepCComments !lin !col = grepNested ccomments lin col | ||
18 | |||
19 | data CommentToken = NullInput -- ^ Empty string. | ||
20 | | OpenNested -- ^ Open nested comment. | ||
21 | | CloseNested -- ^ Close nested comment. | ||
22 | | StartLine -- ^ Open line-comment. | ||
23 | | EOL -- ^ Close line-comment. | ||
24 | | IgnoredChar Int -- ^ Miscellaneous character. Argument is column width (e.g. 8 for tabs, 1 otherwise). | ||
25 | |||
26 | data CommentProtocol a = CommentProtocol | ||
27 | { cmtToken :: a -> (CommentToken,Int,a) | ||
28 | , cmtSplitAt :: Int -> a -> (a,a) | ||
29 | } | ||
30 | |||
31 | ccomments :: CommentProtocol String | ||
32 | ccomments = CommentProtocol | ||
33 | { cmtToken = \case | ||
34 | [] -> (NullInput , 0 , "") | ||
35 | '/':'*':s -> (OpenNested , 2 , s) | ||
36 | '/':'/':s -> (StartLine , 2 , s) | ||
37 | '*':'/':s -> (CloseNested , 2 , s) | ||
38 | '\n':s -> (EOL , 1 , s) | ||
39 | '\t':s -> (IgnoredChar 8 , 1 , s) | ||
40 | _:s -> (IgnoredChar 1 , 1 , s) | ||
41 | , cmtSplitAt = splitAt | ||
42 | } | ||
43 | |||
44 | ccommentsBL :: CommentProtocol BL.ByteString | ||
45 | ccommentsBL = CommentProtocol | ||
46 | { cmtToken = \s -> | ||
47 | if BL.null s then (NullInput,0,s) | ||
48 | else if BL.isPrefixOf "/*" s then (OpenNested , 2, BL.drop 2 s) | ||
49 | else if BL.isPrefixOf "*/" s then (CloseNested , 2, BL.drop 2 s) | ||
50 | else if BL.isPrefixOf "//" s then (StartLine , 2, BL.drop 2 s) | ||
51 | else if BL.isPrefixOf "\n" s then (EOL , 1, BL.drop 1 s) | ||
52 | else (IgnoredChar (if BL.head s=='\t' then 8 else 1),1,BL.drop 1 s) | ||
53 | , cmtSplitAt = BL.splitAt . fromIntegral | ||
54 | } | ||
55 | |||
56 | grepNested :: Num a => | ||
57 | CommentProtocol c -> a -> Int -> c -> [(a, Int, c)] | ||
58 | grepNested p !lin !col s = case cmtToken p s of | ||
59 | (OpenNested,tlen,cs) | ||
60 | -> let (lcnt,col',bcnt,clserlen) = findCloser p 1 (0,col,0) cs | ||
61 | (xs,cs') = case cmtSplitAt p (bcnt-clserlen) cs of | ||
62 | (s1,s2) | (CloseNested,l,s3) <- cmtToken p s2 | ||
63 | -> (s1, snd . cmtSplitAt p clserlen $ s2) | ||
64 | _ -> cmtSplitAt p bcnt cs | ||
65 | in mkComment lin col xs : grepNested p (lin + lcnt) col' cs' | ||
66 | (StartLine,tlen,cs) | ||
67 | -> let findEOL !tot xs = case cmtToken p xs of | ||
68 | (EOL,tlen,_) -> tot | ||
69 | (tok,tlen,ys) -> findEOL (tot + tlen) ys | ||
70 | (comment,_) = cmtSplitAt p (findEOL 0 cs) cs | ||
71 | in mkComment lin col comment : grepNested p (lin + 1) 1 cs | ||
72 | (EOL,tlen,cs) -> grepNested p (lin+1) 1 cs | ||
73 | (NullInput,tlen,_) -> [] | ||
74 | (IgnoredChar clen,_,cs) -> grepNested p lin (col+clen) cs | ||
75 | |||
76 | |||
77 | findCloser :: Num lin => CommentProtocol t | ||
78 | -> Int -- ^ Nested comment depth, used by recursive calls, pass 1. | ||
79 | -> (lin, Int, Int) -- ^ Accumulated result. | ||
80 | -> t | ||
81 | -> (lin, Int, Int, Int) -- ^ (line,col,bytes,closer length) | ||
82 | findCloser p !d (l,c,b) s = l `seq` c `seq` b `seq` case cmtToken p s of | ||
83 | (NullInput,tlen,_) -> (l,c,b,tlen) | ||
84 | (CloseNested,tlen,xs) -> if d==1 then (l,c+tlen,b+tlen,tlen) | ||
85 | else findCloser p (d - 1) (l,c+tlen,b+tlen) xs | ||
86 | (OpenNested,tlen,xs) -> findCloser p (d + 1) (l,c+tlen,b+tlen) xs | ||
87 | (EOL,tlen,xs) -> findCloser p d (l+1,1,b+tlen) xs | ||
88 | (IgnoredChar clen,tlen,xs) -> findCloser p d (l,c+clen,b+tlen) xs | ||
89 | (StartLine,tlen,xs) -> findCloser p d (l,c+tlen,b+tlen) xs | ||
90 | |||
91 | |||
92 | mkComment :: a -> b -> c -> (a, b, c) | ||
93 | mkComment lin no str = (lin,no,str) | ||