From 5a5ae4d92dec631a8162807d4b6a930e84365135 Mon Sep 17 00:00:00 2001 From: Armin Friedl Date: Fri, 12 Aug 2016 10:23:45 +0200 Subject: [PATCH] mark unmatched quotes, trimmed comments --- src/App/Fontifier.hs | 166 ++++++++++++++++++------------------------- 1 file changed, 68 insertions(+), 98 deletions(-) diff --git a/src/App/Fontifier.hs b/src/App/Fontifier.hs index 396fd5b..751b7f3 100644 --- a/src/App/Fontifier.hs +++ b/src/App/Fontifier.hs @@ -1,10 +1,3 @@ -{-# LANGUAGE OverloadedStrings #-} - --- I somewhat believe this can be done much better --- with Monads or Arrows and an eDSL. Parsers like --- Parsec are already half way there but I just --- can't wrap my head around how to adapt the --- concept to our use case. module Fontifier (fontMap, fontify, Fontification(..)) where {---- Imports ----} @@ -26,9 +19,13 @@ data Fontification = FT { ftStart :: Int -- start index in string , ftAttr :: AttrName -- attribute name } deriving (Show) --- TODO we could re-structure functions below to make --- more use of custom types and buy us some more --- guarantees from the compiler +type Fontifications = [Fontification] +type Fontifier = (CheckString -> Fontifications) +type Slice = (Int,Int) +type Slices = [Slice] +type MatchResult = [Tdfa.MatchArray] +type RegexStr = String +type CheckString = String {---- Functions ----} @@ -42,146 +39,119 @@ fontMap = [ (attrName "assignment" , fg Vty.blue) , (attrName "err_string" , Vty.withStyle (fg Vty.red) Vty.underline) -- unterminated strings ] --- calls the fontification functions --- and returns a list of attributes for text slices. --- Fontification functions must take an IsString --- (either a ByteString, Text or normal String) and --- return a [Fontification] list -fontify :: String -> [Fontification] +fontify :: CheckString -> Fontifications fontify = concatApply [ assignments, returns , strings , guards, comments , mismatchedBrackets + , untermStrings2 , untermStrings] --- TODO: CLEAN UP ALL THAT LENGHTY COMMENTS - --- applies the functions in order --- the fontification of the last function overrules --- all previous ones in case more than one matches for a slice. --- The last function has the highest precedence. --- This can be useful to make e.g. comments "stronger" than strings: --- A string in a comment is then fontified as comment. --- This works because [Fontification] is applied in order in Gui.hs. --- We /could/ also enforce this by a special type for [Fontification] -concatApply :: [(String -> [Fontification])] -> String -> [Fontification] +concatApply :: [Fontifier] -> CheckString -> Fontifications concatApply [] _ = [] -concatApply (f:fs) s = (f $ fromString s) ++ concatApply fs s - --- the same as concatApply just that the first function --- in the list has the highest precedence -concatRevply :: [(String -> [Fontification])] -> String -> [Fontification] -concatRevply fs = concatApply (reverse fs) +concatApply (f:fs) s = f s ++ concatApply fs s -- TODO: most of the .+ don't actually allow any whitespace-like character -- maybe we should use \w instead? --- regexes can be improved in many ways i think --- TODO2: We could pack the "attr" "regex" into a type and run through --- them in a function --- -> 1. The definitions would be in one place and not spilled --- over the whole file in these mini-functions --- -> 2. We could make Fontifier.hs even more general and provide: --- font definition, regexes and custom font functions --- through another file. This way we could define --- fontification for >1 grammer. Not that this is --- demanded by the assignment ^^ -assignments :: String -> [Fontification] +assignments :: CheckString -> Fontifications assignments = matchAttrAll "assignment" "\\**.+=.+;" -guards :: String -> [Fontification] +guards :: CheckString -> Fontifications guards = matchAttrAll "guard" "\\[.+:.+\\]" -returns :: String -> [Fontification] +returns :: CheckString -> Fontifications returns = matchAttrAll "return" "\\^.+;" -strings :: String -> [Fontification] -strings = matchAttrAll "string" "\".*\"" +strings :: CheckString -> Fontifications +strings = matchAttrAll "string" "\"[^%]*\"" -comments :: String -> [Fontification] +comments :: CheckString -> Fontifications comments = matchAttrAll "comment" "%.*" -matchAttrAll :: String -> String -> String -> [Fontification] +matchAttrAll :: String -> RegexStr -> CheckString -> Fontifications matchAttrAll attribute regexString bs = let regex = Tdfa.makeRegex regexString :: Tdfa.Regex in matchToFT (attrName attribute) $ Tdfa.matchAll regex bs -- Converts contents of a MatchArray (result of a regex matching) -- to a fontification with the given attribute -matchToFT :: AttrName -> [Tdfa.MatchArray] -> [Fontification] +matchToFT :: AttrName -> MatchResult -> Fontifications matchToFT attrName matchArray = map sliceToFT (matchesToSlices matchArray) where sliceToFT (start,end) = FT start end attrName -matchesToSlices :: [Tdfa.MatchArray] -> [(Int,Int)] +matchesToSlices :: MatchResult -> Slices matchesToSlices [] = [] matchesToSlices (m:ms) = let match = m ! 0 -- get 1st elem of match array (this is the whole match, which we want) in (fst match, (fst match) + (snd match)): matchesToSlices ms --- TODO per se regexes are too weak to find matching --- things. We could use a regex lib that supports --- backreferences to achieve this though (i think) --- But this: --- a) is not so easy to get completely right --- (http://www.regular-expressions.info/backref.html) --- b) may backtrack a lot (but is also more powerful than what we have below) --- c) tdfa lib is one of or the most efficient lib(s) for haskell (i think) - -mismatchedBrackets :: String -> [Fontification] +mismatchedBrackets :: CheckString -> Fontifications mismatchedBrackets s = concatMap fontifyMismatched pairs where fontifyMismatched = mismatched (attrName "err_brackets") s pairs = [('(',')'),('[',']'),('{','}')] -mismatched :: AttrName -> String -> (Char, Char) -> [Fontification] +mismatched :: AttrName -> CheckString -> (Char, Char) -> Fontifications mismatched attrName s (open, close) = slicesToFT attrName $ (fst orphans)++(snd orphans) where orphans = mismatched' s 0 ([],[]) - stringsFts = strings s -- this is ugly - commentsFts = comments s -- this is ugly + stringsFts = strings s + commentsFts = comments s mismatched' [] _ ophs = ophs mismatched' (curChr:restStr) ix ophs - -- 1. this is ugly, 2. if it's already fontified, go on - | or $ map (isFontified (ix,ix+1)) [stringsFts, commentsFts] = mismatched' restStr (ix+1) ophs - -- if we encounter an open it is an orphan until we find a close - -- chars are always 1 long -> (ix,ix+1) - -- use of (:) reverses the occurences but is more efficient than (++) - | curChr == open = mismatched' restStr (ix+1) ((ix, ix+1):(fst ophs), snd ophs) - -- if we encounter a close delete nearest open or mark as orphan itself + | or $ map (isFontified (ix,ix+1)) [stringsFts, commentsFts] = mismatched' restStr (ix+1) ophs -- already fontified? -> continue + | curChr == open = mismatched' restStr (ix+1) ((ix, ix+1):(fst ophs), snd ophs) -- open? -> assume orphan | curChr == close = case null (fst ophs) of - -- if orphans is empty the close itself is an orphan (has no matching open) - -- -> put it into mandatory (snd orphans), these are never pruned - -- -> this is imo the ugliest thing about this function, but I don't know how to - -- solve it nicely - True -> mismatched' restStr (ix+1) (fst ophs, (ix,ix+1):(snd ophs)) - -- occurence of close deletes the closest open from orphans (strips - -- head of orphans because we reversed the occurences in | s==open = ...) - False -> mismatched' restStr (ix+1) (tail (fst ophs), snd ophs) - -- current char neither open nor close -> not relevant for us - | otherwise = mismatched' restStr (ix+1) ophs + True -> mismatched' restStr (ix+1) (fst ophs, (ix,ix+1):(snd ophs)) -- close but no orphaned open? -> fix as orphan + False -> mismatched' restStr (ix+1) (tail (fst ophs), snd ophs) -- close and ophaned open exists? -> delete nearest orphaned open + | otherwise = mismatched' restStr (ix+1) ophs -- neither open nor close? -> continue -untermStrings :: String -> [Fontification] -untermStrings s = slicesToFT (attrName "err_string") $ lastChar untermSlices - where commentsFts = comments s - regex = Tdfa.makeRegex ("\".*\"[^\\;]"::String) :: Tdfa.Regex - untermSlices = prune $ matchesToSlices $ Tdfa.matchAll regex s - prune [] = [] - prune (unterm: unterms) - | isFontified unterm commentsFts = prune unterms - | otherwise = unterm: prune unterms - -- gets the slice containing the char after an untermed string (can be - -- special like newline too) - lastChar [] = [] - lastChar (unterm:unterms) = let end = snd unterm - in (end-1, end): lastChar unterms +untermStrings :: CheckString -> Fontifications +untermStrings s = slicesToFT (attrName "err_string") $ lastChar untermed + where regex = Tdfa.makeRegex ("\"[^%]*\"[^\\;]"::String) :: Tdfa.Regex + commentsFts = comments s + -- strip % from commentsFts + commentsFts' [] = [] + commentsFts' (ft: fts) = FT (ftStart ft + 1) (ftEnd ft) (ftAttr ft): commentsFts' fts + untermed = prune commentsFts $ matchesToSlices $ Tdfa.matchAll regex s + +untermStrings2 :: CheckString -> Fontifications +untermStrings2 s = slicesToFT (attrName "err_string") $ firstChar untermed + where regex = Tdfa.makeRegex ("\""::String) :: Tdfa.Regex + matchedSlices = matchesToSlices $ Tdfa.matchAll regex s + untermed = let commentsFts = comments s; stringsFts = strings s + in negPrune (commentsFts++stringsFts) matchedSlices + negPrune _ [] = [] + negPrune checkFts (curSlice:slices) + | not $ isFontified curSlice checkFts = curSlice: negPrune checkFts slices + | otherwise = negPrune checkFts slices + +prune :: Fontifications -> Slices -> Slices +prune _ [] = [] +prune checkFts (curSlice: slices) + | isFontified curSlice checkFts = prune checkFts slices + | otherwise = curSlice: prune checkFts slices + +-- gets the slice containing the last char of a slice +lastChar :: Slices -> Slices +lastChar [] = [] +lastChar (curSlice:slices) = let end = snd curSlice + in (end-1, end): lastChar slices + +-- gets the slice containing the first char of a slice +firstChar :: Slices -> Slices +firstChar [] = [] +firstChar (curSlice:slices) = let start = fst curSlice + in (start, start+1): firstChar slices -slicesToFT :: AttrName -> [(Int,Int)] -> [Fontification] +slicesToFT :: AttrName -> Slices -> Fontifications slicesToFT attrName slices = slicesToFT' slices where slicesToFT' [] = [] slicesToFT' (m:ms) = let start = fst m; end = snd m in (FT start end attrName) : slicesToFT' ms -- checks if a slice is already contained in a fontification -isFontified :: (Int,Int) -> [Fontification] -> Bool +isFontified :: Slice -> Fontifications -> Bool isFontified _ [] = False isFontified slice@(start,end) (f:fs) | ftStart f <= start && ftEnd f >= end = True