mark unmatched quotes, trimmed comments
This commit is contained in:
parent
20f968dba5
commit
5a5ae4d92d
1 changed files with 68 additions and 98 deletions
|
@ -1,10 +1,3 @@
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
|
||||||
|
|
||||||
-- I somewhat believe this can be done much better
|
|
||||||
-- with Monads or Arrows and an eDSL. Parsers like
|
|
||||||
-- Parsec are already half way there but I just
|
|
||||||
-- can't wrap my head around how to adapt the
|
|
||||||
-- concept to our use case.
|
|
||||||
module Fontifier (fontMap, fontify, Fontification(..)) where
|
module Fontifier (fontMap, fontify, Fontification(..)) where
|
||||||
|
|
||||||
{---- Imports ----}
|
{---- Imports ----}
|
||||||
|
@ -26,9 +19,13 @@ data Fontification = FT { ftStart :: Int -- start index in string
|
||||||
, ftAttr :: AttrName -- attribute name
|
, ftAttr :: AttrName -- attribute name
|
||||||
} deriving (Show)
|
} deriving (Show)
|
||||||
|
|
||||||
-- TODO we could re-structure functions below to make
|
type Fontifications = [Fontification]
|
||||||
-- more use of custom types and buy us some more
|
type Fontifier = (CheckString -> Fontifications)
|
||||||
-- guarantees from the compiler
|
type Slice = (Int,Int)
|
||||||
|
type Slices = [Slice]
|
||||||
|
type MatchResult = [Tdfa.MatchArray]
|
||||||
|
type RegexStr = String
|
||||||
|
type CheckString = String
|
||||||
|
|
||||||
{---- Functions ----}
|
{---- Functions ----}
|
||||||
|
|
||||||
|
@ -42,146 +39,119 @@ fontMap = [ (attrName "assignment" , fg Vty.blue)
|
||||||
, (attrName "err_string" , Vty.withStyle (fg Vty.red) Vty.underline) -- unterminated strings
|
, (attrName "err_string" , Vty.withStyle (fg Vty.red) Vty.underline) -- unterminated strings
|
||||||
]
|
]
|
||||||
|
|
||||||
-- calls the fontification functions
|
fontify :: CheckString -> Fontifications
|
||||||
-- and returns a list of attributes for text slices.
|
|
||||||
-- Fontification functions must take an IsString
|
|
||||||
-- (either a ByteString, Text or normal String) and
|
|
||||||
-- return a [Fontification] list
|
|
||||||
fontify :: String -> [Fontification]
|
|
||||||
fontify = concatApply [ assignments, returns
|
fontify = concatApply [ assignments, returns
|
||||||
, strings
|
, strings
|
||||||
, guards, comments
|
, guards, comments
|
||||||
, mismatchedBrackets
|
, mismatchedBrackets
|
||||||
|
, untermStrings2
|
||||||
, untermStrings]
|
, untermStrings]
|
||||||
|
|
||||||
-- TODO: CLEAN UP ALL THAT LENGHTY COMMENTS
|
concatApply :: [Fontifier] -> CheckString -> Fontifications
|
||||||
|
|
||||||
-- applies the functions in order
|
|
||||||
-- the fontification of the last function overrules
|
|
||||||
-- all previous ones in case more than one matches for a slice.
|
|
||||||
-- The last function has the highest precedence.
|
|
||||||
-- This can be useful to make e.g. comments "stronger" than strings:
|
|
||||||
-- A string in a comment is then fontified as comment.
|
|
||||||
-- This works because [Fontification] is applied in order in Gui.hs.
|
|
||||||
-- We /could/ also enforce this by a special type for [Fontification]
|
|
||||||
concatApply :: [(String -> [Fontification])] -> String -> [Fontification]
|
|
||||||
concatApply [] _ = []
|
concatApply [] _ = []
|
||||||
concatApply (f:fs) s = (f $ fromString s) ++ concatApply fs s
|
concatApply (f:fs) s = f s ++ concatApply fs s
|
||||||
|
|
||||||
-- the same as concatApply just that the first function
|
|
||||||
-- in the list has the highest precedence
|
|
||||||
concatRevply :: [(String -> [Fontification])] -> String -> [Fontification]
|
|
||||||
concatRevply fs = concatApply (reverse fs)
|
|
||||||
|
|
||||||
-- TODO: most of the .+ don't actually allow any whitespace-like character
|
-- TODO: most of the .+ don't actually allow any whitespace-like character
|
||||||
-- maybe we should use \w instead?
|
-- maybe we should use \w instead?
|
||||||
-- regexes can be improved in many ways i think
|
|
||||||
-- TODO2: We could pack the "attr" "regex" into a type and run through
|
|
||||||
-- them in a function
|
|
||||||
-- -> 1. The definitions would be in one place and not spilled
|
|
||||||
-- over the whole file in these mini-functions
|
|
||||||
-- -> 2. We could make Fontifier.hs even more general and provide:
|
|
||||||
-- font definition, regexes and custom font functions
|
|
||||||
-- through another file. This way we could define
|
|
||||||
-- fontification for >1 grammer. Not that this is
|
|
||||||
-- demanded by the assignment ^^
|
|
||||||
|
|
||||||
assignments :: String -> [Fontification]
|
assignments :: CheckString -> Fontifications
|
||||||
assignments = matchAttrAll "assignment" "\\**.+=.+;"
|
assignments = matchAttrAll "assignment" "\\**.+=.+;"
|
||||||
|
|
||||||
guards :: String -> [Fontification]
|
guards :: CheckString -> Fontifications
|
||||||
guards = matchAttrAll "guard" "\\[.+:.+\\]"
|
guards = matchAttrAll "guard" "\\[.+:.+\\]"
|
||||||
|
|
||||||
returns :: String -> [Fontification]
|
returns :: CheckString -> Fontifications
|
||||||
returns = matchAttrAll "return" "\\^.+;"
|
returns = matchAttrAll "return" "\\^.+;"
|
||||||
|
|
||||||
strings :: String -> [Fontification]
|
strings :: CheckString -> Fontifications
|
||||||
strings = matchAttrAll "string" "\".*\""
|
strings = matchAttrAll "string" "\"[^%]*\""
|
||||||
|
|
||||||
comments :: String -> [Fontification]
|
comments :: CheckString -> Fontifications
|
||||||
comments = matchAttrAll "comment" "%.*"
|
comments = matchAttrAll "comment" "%.*"
|
||||||
|
|
||||||
matchAttrAll :: String -> String -> String -> [Fontification]
|
matchAttrAll :: String -> RegexStr -> CheckString -> Fontifications
|
||||||
matchAttrAll attribute regexString bs =
|
matchAttrAll attribute regexString bs =
|
||||||
let regex = Tdfa.makeRegex regexString :: Tdfa.Regex
|
let regex = Tdfa.makeRegex regexString :: Tdfa.Regex
|
||||||
in matchToFT (attrName attribute) $ Tdfa.matchAll regex bs
|
in matchToFT (attrName attribute) $ Tdfa.matchAll regex bs
|
||||||
|
|
||||||
-- Converts contents of a MatchArray (result of a regex matching)
|
-- Converts contents of a MatchArray (result of a regex matching)
|
||||||
-- to a fontification with the given attribute
|
-- to a fontification with the given attribute
|
||||||
matchToFT :: AttrName -> [Tdfa.MatchArray] -> [Fontification]
|
matchToFT :: AttrName -> MatchResult -> Fontifications
|
||||||
matchToFT attrName matchArray = map sliceToFT (matchesToSlices matchArray)
|
matchToFT attrName matchArray = map sliceToFT (matchesToSlices matchArray)
|
||||||
where sliceToFT (start,end) = FT start end attrName
|
where sliceToFT (start,end) = FT start end attrName
|
||||||
|
|
||||||
matchesToSlices :: [Tdfa.MatchArray] -> [(Int,Int)]
|
matchesToSlices :: MatchResult -> Slices
|
||||||
matchesToSlices [] = []
|
matchesToSlices [] = []
|
||||||
matchesToSlices (m:ms) = let match = m ! 0 -- get 1st elem of match array (this is the whole match, which we want)
|
matchesToSlices (m:ms) = let match = m ! 0 -- get 1st elem of match array (this is the whole match, which we want)
|
||||||
in (fst match, (fst match) + (snd match)): matchesToSlices ms
|
in (fst match, (fst match) + (snd match)): matchesToSlices ms
|
||||||
|
|
||||||
-- TODO per se regexes are too weak to find matching
|
mismatchedBrackets :: CheckString -> Fontifications
|
||||||
-- things. We could use a regex lib that supports
|
|
||||||
-- backreferences to achieve this though (i think)
|
|
||||||
-- But this:
|
|
||||||
-- a) is not so easy to get completely right
|
|
||||||
-- (http://www.regular-expressions.info/backref.html)
|
|
||||||
-- b) may backtrack a lot (but is also more powerful than what we have below)
|
|
||||||
-- c) tdfa lib is one of or the most efficient lib(s) for haskell (i think)
|
|
||||||
|
|
||||||
mismatchedBrackets :: String -> [Fontification]
|
|
||||||
mismatchedBrackets s = concatMap fontifyMismatched pairs
|
mismatchedBrackets s = concatMap fontifyMismatched pairs
|
||||||
where fontifyMismatched = mismatched (attrName "err_brackets") s
|
where fontifyMismatched = mismatched (attrName "err_brackets") s
|
||||||
pairs = [('(',')'),('[',']'),('{','}')]
|
pairs = [('(',')'),('[',']'),('{','}')]
|
||||||
|
|
||||||
mismatched :: AttrName -> String -> (Char, Char) -> [Fontification]
|
mismatched :: AttrName -> CheckString -> (Char, Char) -> Fontifications
|
||||||
mismatched attrName s (open, close) = slicesToFT attrName $ (fst orphans)++(snd orphans)
|
mismatched attrName s (open, close) = slicesToFT attrName $ (fst orphans)++(snd orphans)
|
||||||
where orphans = mismatched' s 0 ([],[])
|
where orphans = mismatched' s 0 ([],[])
|
||||||
stringsFts = strings s -- this is ugly
|
stringsFts = strings s
|
||||||
commentsFts = comments s -- this is ugly
|
commentsFts = comments s
|
||||||
mismatched' [] _ ophs = ophs
|
mismatched' [] _ ophs = ophs
|
||||||
mismatched' (curChr:restStr) ix ophs
|
mismatched' (curChr:restStr) ix ophs
|
||||||
-- 1. this is ugly, 2. if it's already fontified, go on
|
| or $ map (isFontified (ix,ix+1)) [stringsFts, commentsFts] = mismatched' restStr (ix+1) ophs -- already fontified? -> continue
|
||||||
| or $ map (isFontified (ix,ix+1)) [stringsFts, commentsFts] = mismatched' restStr (ix+1) ophs
|
| curChr == open = mismatched' restStr (ix+1) ((ix, ix+1):(fst ophs), snd ophs) -- open? -> assume orphan
|
||||||
-- if we encounter an open it is an orphan until we find a close
|
|
||||||
-- chars are always 1 long -> (ix,ix+1)
|
|
||||||
-- use of (:) reverses the occurences but is more efficient than (++)
|
|
||||||
| curChr == open = mismatched' restStr (ix+1) ((ix, ix+1):(fst ophs), snd ophs)
|
|
||||||
-- if we encounter a close delete nearest open or mark as orphan itself
|
|
||||||
| curChr == close = case null (fst ophs) of
|
| curChr == close = case null (fst ophs) of
|
||||||
-- if orphans is empty the close itself is an orphan (has no matching open)
|
True -> mismatched' restStr (ix+1) (fst ophs, (ix,ix+1):(snd ophs)) -- close but no orphaned open? -> fix as orphan
|
||||||
-- -> put it into mandatory (snd orphans), these are never pruned
|
False -> mismatched' restStr (ix+1) (tail (fst ophs), snd ophs) -- close and ophaned open exists? -> delete nearest orphaned open
|
||||||
-- -> this is imo the ugliest thing about this function, but I don't know how to
|
| otherwise = mismatched' restStr (ix+1) ophs -- neither open nor close? -> continue
|
||||||
-- solve it nicely
|
|
||||||
True -> mismatched' restStr (ix+1) (fst ophs, (ix,ix+1):(snd ophs))
|
|
||||||
-- occurence of close deletes the closest open from orphans (strips
|
|
||||||
-- head of orphans because we reversed the occurences in | s==open = ...)
|
|
||||||
False -> mismatched' restStr (ix+1) (tail (fst ophs), snd ophs)
|
|
||||||
-- current char neither open nor close -> not relevant for us
|
|
||||||
| otherwise = mismatched' restStr (ix+1) ophs
|
|
||||||
|
|
||||||
|
|
||||||
untermStrings :: String -> [Fontification]
|
untermStrings :: CheckString -> Fontifications
|
||||||
untermStrings s = slicesToFT (attrName "err_string") $ lastChar untermSlices
|
untermStrings s = slicesToFT (attrName "err_string") $ lastChar untermed
|
||||||
where commentsFts = comments s
|
where regex = Tdfa.makeRegex ("\"[^%]*\"[^\\;]"::String) :: Tdfa.Regex
|
||||||
regex = Tdfa.makeRegex ("\".*\"[^\\;]"::String) :: Tdfa.Regex
|
commentsFts = comments s
|
||||||
untermSlices = prune $ matchesToSlices $ Tdfa.matchAll regex s
|
-- strip % from commentsFts
|
||||||
prune [] = []
|
commentsFts' [] = []
|
||||||
prune (unterm: unterms)
|
commentsFts' (ft: fts) = FT (ftStart ft + 1) (ftEnd ft) (ftAttr ft): commentsFts' fts
|
||||||
| isFontified unterm commentsFts = prune unterms
|
untermed = prune commentsFts $ matchesToSlices $ Tdfa.matchAll regex s
|
||||||
| otherwise = unterm: prune unterms
|
|
||||||
-- gets the slice containing the char after an untermed string (can be
|
untermStrings2 :: CheckString -> Fontifications
|
||||||
-- special like newline too)
|
untermStrings2 s = slicesToFT (attrName "err_string") $ firstChar untermed
|
||||||
lastChar [] = []
|
where regex = Tdfa.makeRegex ("\""::String) :: Tdfa.Regex
|
||||||
lastChar (unterm:unterms) = let end = snd unterm
|
matchedSlices = matchesToSlices $ Tdfa.matchAll regex s
|
||||||
in (end-1, end): lastChar unterms
|
untermed = let commentsFts = comments s; stringsFts = strings s
|
||||||
|
in negPrune (commentsFts++stringsFts) matchedSlices
|
||||||
|
negPrune _ [] = []
|
||||||
|
negPrune checkFts (curSlice:slices)
|
||||||
|
| not $ isFontified curSlice checkFts = curSlice: negPrune checkFts slices
|
||||||
|
| otherwise = negPrune checkFts slices
|
||||||
|
|
||||||
|
prune :: Fontifications -> Slices -> Slices
|
||||||
|
prune _ [] = []
|
||||||
|
prune checkFts (curSlice: slices)
|
||||||
|
| isFontified curSlice checkFts = prune checkFts slices
|
||||||
|
| otherwise = curSlice: prune checkFts slices
|
||||||
|
|
||||||
|
-- gets the slice containing the last char of a slice
|
||||||
|
lastChar :: Slices -> Slices
|
||||||
|
lastChar [] = []
|
||||||
|
lastChar (curSlice:slices) = let end = snd curSlice
|
||||||
|
in (end-1, end): lastChar slices
|
||||||
|
|
||||||
|
-- gets the slice containing the first char of a slice
|
||||||
|
firstChar :: Slices -> Slices
|
||||||
|
firstChar [] = []
|
||||||
|
firstChar (curSlice:slices) = let start = fst curSlice
|
||||||
|
in (start, start+1): firstChar slices
|
||||||
|
|
||||||
|
|
||||||
slicesToFT :: AttrName -> [(Int,Int)] -> [Fontification]
|
slicesToFT :: AttrName -> Slices -> Fontifications
|
||||||
slicesToFT attrName slices = slicesToFT' slices
|
slicesToFT attrName slices = slicesToFT' slices
|
||||||
where slicesToFT' [] = []
|
where slicesToFT' [] = []
|
||||||
slicesToFT' (m:ms) = let start = fst m; end = snd m
|
slicesToFT' (m:ms) = let start = fst m; end = snd m
|
||||||
in (FT start end attrName) : slicesToFT' ms
|
in (FT start end attrName) : slicesToFT' ms
|
||||||
|
|
||||||
-- checks if a slice is already contained in a fontification
|
-- checks if a slice is already contained in a fontification
|
||||||
isFontified :: (Int,Int) -> [Fontification] -> Bool
|
isFontified :: Slice -> Fontifications -> Bool
|
||||||
isFontified _ [] = False
|
isFontified _ [] = False
|
||||||
isFontified slice@(start,end) (f:fs)
|
isFontified slice@(start,end) (f:fs)
|
||||||
| ftStart f <= start && ftEnd f >= end = True
|
| ftStart f <= start && ftEnd f >= end = True
|
||||||
|
|
Loading…
Reference in a new issue