summaryrefslogtreecommitdiff
path: root/GrepNested.hs
blob: 74033bd3227a9b2ce82001aa879077e2c794bd96 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
{-# LANGUAGE BangPatterns      #-}
{-# LANGUAGE LambdaCase        #-}
{-# LANGUAGE OverloadedStrings #-}
module GrepNested
        ( grepCComments
        , grepNested
        , CommentProtocol(..)
        , CommentToken(..)
        ) where

import Control.Arrow
import Data.List
import Data.String
import qualified Data.ByteString.Lazy.Char8 as BL

grepCComments :: Num a => a -> Int -> String -> [(a, Int, String)]
grepCComments !lin !col = grepNested ccomments lin col

data CommentToken = NullInput   -- ^ Empty string.
                  | OpenNested  -- ^ Open nested comment.
                  | CloseNested -- ^ Close nested comment.
                  | StartLine   -- ^ Open line-comment.
                  | EOL         -- ^ Close line-comment.
                  | IgnoredChar Int -- ^ Miscellaneous character.  Argument is column width (e.g. 8 for tabs, 1 otherwise).

data CommentProtocol a = CommentProtocol
    { cmtToken   :: a -> (CommentToken,Int,a)
    , cmtSplitAt :: Int -> a -> (a,a)
    }

ccomments :: CommentProtocol String
ccomments = CommentProtocol
    { cmtToken = \case
        []        -> (NullInput     , 0 , "")
        '/':'*':s -> (OpenNested    , 2 , s)
        '/':'/':s -> (StartLine     , 2 , s)
        '*':'/':s -> (CloseNested   , 2 , s)
        '\n':s    -> (EOL           , 1 , s)
        '\t':s    -> (IgnoredChar 8 , 1 , s)
        _:s       -> (IgnoredChar 1 , 1 , s)
    , cmtSplitAt = splitAt
    }

ccommentsBL :: CommentProtocol BL.ByteString
ccommentsBL = CommentProtocol
    { cmtToken = \s ->
        if BL.null s then (NullInput,0,s)
        else if BL.isPrefixOf "/*" s then (OpenNested  , 2, BL.drop 2 s)
        else if BL.isPrefixOf "*/" s then (CloseNested , 2, BL.drop 2 s)
        else if BL.isPrefixOf "//" s then (StartLine   , 2, BL.drop 2 s)
        else if BL.isPrefixOf "\n" s then (EOL         , 1, BL.drop 1 s)
        else (IgnoredChar (if BL.head s=='\t' then 8 else 1),1,BL.drop 1 s)
    , cmtSplitAt = BL.splitAt . fromIntegral
    }

grepNested :: Num a =>
              CommentProtocol c -> a -> Int -> c -> [(a, Int, c)]
grepNested p !lin !col s = case cmtToken p s of
    (OpenNested,tlen,cs)
            -> let (lcnt,col',bcnt,clserlen) = findCloser p 1 (0,col,0) cs
                   (xs,cs') = case cmtSplitAt p (bcnt-clserlen) cs of
                       (s1,s2) | (CloseNested,l,s3) <- cmtToken p s2
                                 -> (s1, snd . cmtSplitAt p clserlen $ s2)
                       _         -> cmtSplitAt p bcnt cs
               in mkComment lin col xs : grepNested p (lin + lcnt) col' cs'
    (StartLine,tlen,cs)
            -> let findEOL !tot xs = case cmtToken p xs of
                                        (EOL,tlen,_)  -> (tot , tlen )
                                        (tok,tlen,ys) -> findEOL (tot + tlen) ys
                   (clen,dlen) = findEOL 0 cs
                   (comment,ds) = cmtSplitAt p clen cs
                   (_,es) = cmtSplitAt p dlen ds
               in mkComment lin col comment : grepNested p (lin + 1) 1 es
    (EOL,tlen,cs)              -> grepNested p (lin+1) 1 cs
    (NullInput,tlen,_)         -> []
    (IgnoredChar clen,_,cs) -> grepNested p lin (col+clen) cs
    (CloseNested,tlen,cs)   -> grepNested p lin (col+tlen) cs


findCloser :: Num lin => CommentProtocol t
                         -> Int             -- ^ Nested comment depth, used by recursive calls, pass 1.
                         -> (lin, Int, Int) -- ^ Accumulated result.
                         -> t
                         -> (lin, Int, Int, Int) -- ^ (line,col,bytes,closer length)
findCloser p !d (l,c,b) s = l `seq` c `seq` b `seq` case cmtToken p s of
    (NullInput,tlen,_)    -> (l,c,b,tlen)
    (CloseNested,tlen,xs) -> if d==1 then (l,c+tlen,b+tlen,tlen)
                                     else findCloser p (d - 1) (l,c+tlen,b+tlen) xs
    (OpenNested,tlen,xs)  -> findCloser p (d + 1) (l,c+tlen,b+tlen) xs
    (EOL,tlen,xs)         -> findCloser p d (l+1,1,b+tlen) xs
    (IgnoredChar clen,tlen,xs) -> findCloser p d (l,c+clen,b+tlen) xs
    (StartLine,tlen,xs) -> findCloser p d (l,c+tlen,b+tlen) xs


mkComment :: a -> b -> c -> (a, b, c)
mkComment lin no str = (lin,no,str)