Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
[#64] Refactor the markdown scanner
Problem: Current implementation of the markdown scanner is hard
to extend, so we need to refactor it to add support for new annotations.

Solution: Refactor; improve handling annotations, remove IMSAll state
as it's not required, rename functions.
  • Loading branch information
YuriRomanowski committed Dec 14, 2022
commit 9c6d97cc3be72f6e337ef0d066e0813f08280ca6
2 changes: 1 addition & 1 deletion src/Xrefcheck/Core.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ import Control.Lens (makeLenses)
import Data.Aeson (FromJSON (..), withText)
import Data.Char (isAlphaNum)
import Data.Char qualified as C
import Data.Default (Default (..))
import Data.DList (DList)
import Data.DList qualified as DList
import Data.Default (Default (..))
import Data.List qualified as L
import Data.Reflection (Given)
import Data.Text qualified as T
Expand Down
174 changes: 95 additions & 79 deletions src/Xrefcheck/Scanners/Markdown.hs
Original file line number Diff line number Diff line change
Expand Up @@ -102,26 +102,24 @@ data IgnoreLinkState
-- and we should change `IgnoreLinkState`, because it's not a problem if
-- our node's first child doesn't contain a link. So this status means that
-- we won't throw errors if we don't find a link for now
deriving stock (Eq)
deriving stock (Eq, Show)

data IgnoreModeState
= IMSLink IgnoreLinkState
| IMSParagraph
| IMSAll
deriving stock (Eq)
deriving stock (Eq, Show)

-- | Bind `IgnoreMode` to its `PosInfo` so that we can tell where the
-- corresponding annotation was declared.
data Ignore = Ignore
{ _ignoreMode :: IgnoreModeState
, _ignorePos :: Maybe PosInfo
}
} deriving stock (Show)
makeLensesFor [("_ignoreMode", "ignoreMode")] 'Ignore

data GetIgnoreMode
= NotAnAnnotation
| ValidMode IgnoreMode
| InvalidMode Text
data GetAnnotation
= IgnoreAnnotation IgnoreMode
| InvalidAnnotation Text
deriving stock (Eq)


Expand Down Expand Up @@ -167,48 +165,43 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove
-> ScannerM Node
remove pos ty subs = do
let node = Node pos ty []
scan <- use ssIgnore >>= \case
scan <- use ssIgnore >>= \e -> do
-- When no `Ignore` state is set check next node for annotation,
-- if found then set it as new `IgnoreMode` otherwise skip node.
Nothing -> handleIgnoreMode pos ty subs $ getIgnoreMode node
Just (Ignore mode modePos) ->
case (mode, ty) of
-- We expect to find a paragraph immediately after the
-- `ignore paragraph` annotanion. If the paragraph is not
-- found we should report an error.
(IMSParagraph, PARAGRAPH) -> (ssIgnore .= Nothing) $> defNode
(IMSParagraph, x) -> do
lift . tell . makeError modePos fp . ParagraphErr $ prettyType x
ssIgnore .= Nothing
Node pos ty <$> sequence subs

-- We don't expect to find an `ignore all` annotation here,
-- since that annotation should be at the top of the file and
-- the file should already be ignored when `checkIgnoreFile` is called.
-- We should report an error if we find it anyway.
(IMSAll, _) -> do
lift . tell $ makeError modePos fp FileErr
ssIgnore .= Nothing
Node pos ty <$> sequence subs

(IMSLink _, LINK {}) -> do
ssIgnore .= Nothing
return defNode
(IMSLink _, IMAGE {}) -> do
ssIgnore .= Nothing
return defNode
(IMSLink ignoreLinkState, _) -> do
when (ignoreLinkState == ExpectingLinkInSubnodes) $
ssIgnore . _Just . ignoreMode .= IMSLink ParentExpectsLink
node' <- Node pos ty <$> sequence subs
when (ignoreLinkState == ExpectingLinkInSubnodes) $ do
currentIgnore <- use ssIgnore
case currentIgnore of
Just (Ignore {_ignoreMode = IMSLink ParentExpectsLink}) -> do
lift $ tell $ makeError modePos fp LinkErr
let mbAnnotation = getAnnotation node
case mbAnnotation of
Just ann -> handleAnnotation pos ty ann
Nothing -> case e of
Nothing -> Node pos ty <$> sequence subs
Just (Ignore mode modePos) ->
case (mode, ty) of
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extra indentation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 Where is it? It seems it's not present in the final version

-- We expect to find a paragraph immediately after the
-- `ignore paragraph` annotanion. If the paragraph is not
-- found we should report an error.
(IMSParagraph, PARAGRAPH) -> (ssIgnore .= Nothing) $> defNode
(IMSParagraph, x) -> do
lift . tell . makeError modePos fp . ParagraphErr $ prettyType x
ssIgnore .= Nothing
Node pos ty <$> sequence subs

(IMSLink _, LINK {}) -> do
ssIgnore .= Nothing
return defNode
(IMSLink _, IMAGE {}) -> do
ssIgnore .= Nothing
_ -> pass
return node'
return defNode
(IMSLink ignoreLinkState, _) -> do
when (ignoreLinkState == ExpectingLinkInSubnodes) $
ssIgnore . _Just . ignoreMode .= IMSLink ParentExpectsLink
node' <- Node pos ty <$> sequence subs
when (ignoreLinkState == ExpectingLinkInSubnodes) $ do
currentIgnore <- use ssIgnore
case currentIgnore of
Just (Ignore {_ignoreMode = IMSLink ParentExpectsLink}) -> do
lift $ tell $ makeError modePos fp LinkErr
ssIgnore .= Nothing
_ -> pass
return node'

when (ty == PARAGRAPH) $ use ssIgnore >>= \case
Just (Ignore (IMSLink ExpectingLinkInParagraph) pragmaPos) ->
Expand All @@ -217,28 +210,47 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove

return scan

handleIgnoreMode
handleAnnotation
:: Maybe PosInfo
-> NodeType
-> [ScannerM Node]
-> GetIgnoreMode
-> GetAnnotation
-> ScannerM Node
handleIgnoreMode pos nodeType subs = \case
ValidMode mode -> do
ignoreModeState <- case mode of
IMLink -> use ssParentNodeType <&> IMSLink . \case
Just PARAGRAPH -> ExpectingLinkInParagraph
_ -> ExpectingLinkInSubnodes

IMParagraph -> pure IMSParagraph

IMAll -> pure IMSAll

(ssIgnore .= Just (Ignore ignoreModeState correctPos)) $> defNode
InvalidMode msg -> do
handleAnnotation pos nodeType = \case
IgnoreAnnotation mode -> do
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extra space after mode?

let reportIfThereWasAnnotation :: ScannerM ()
reportIfThereWasAnnotation = do
curIgnore <- use ssIgnore
whenJust curIgnore $ \case
Ignore IMSParagraph prevPos ->
lift . tell . makeError prevPos fp . ParagraphErr $ prettyType nodeType
Ignore (IMSLink _) prevPos ->
lift $ tell $ makeError prevPos fp LinkErr
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here too, the commit says that you apply a refactoring, but here in fact you add a behaviour that was not present before.

I more than agree that this check is good to add, but let's extract it to a separate commit.

Copy link
Contributor Author

@YuriRomanowski YuriRomanowski Dec 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact the behavior hasn't changed here, because in the original version some of these checks were present in other cases on the top level of remove function, and currently all of them are here.


mbIgnoreModeState <- case mode of
IMLink -> do
reportIfThereWasAnnotation
use ssParentNodeType <&> Just . IMSLink . \case
Just PARAGRAPH -> ExpectingLinkInParagraph
_ -> ExpectingLinkInSubnodes

IMParagraph -> do
reportIfThereWasAnnotation
pure $ Just IMSParagraph

-- We don't expect to find an `ignore all` annotation here,
-- since that annotation should be at the top of the file and
-- the file should already be ignored when `checkIgnoreFile` is called.
-- We should report an error if we find it anyway.
IMAll -> do
lift . tell $ makeError correctPos fp FileErr
pure Nothing

whenJust mbIgnoreModeState $ \ignoreModeState ->
(ssIgnore .= Just (Ignore ignoreModeState correctPos))
pure defNode
InvalidAnnotation msg -> do
lift . tell $ makeError correctPos fp $ UnrecognisedErr msg
(ssIgnore .= Nothing) $> defNode
NotAnAnnotation -> Node pos nodeType <$> sequence subs
pure defNode
where
correctPos = getPosition $ Node pos nodeType []

Expand All @@ -261,9 +273,6 @@ removeIgnored fp = withIgnoreMode . cataNodeWithParentNodeInfo remove
IMSLink _ -> do
tell $ makeError pos fp LinkErr
pure node
IMSAll -> do
tell $ makeError pos fp FileErr
pure node
(node, _) -> pure node

-- | Custom `foldMap` for source tree.
Expand Down Expand Up @@ -354,7 +363,7 @@ checkIgnoreAllFile nodes =
isComment = isJust . getCommentContent

isIgnoreFile :: Node -> Bool
isIgnoreFile = (ValidMode IMAll ==) . getIgnoreMode
isIgnoreFile = (Just (IgnoreAnnotation IMAll) ==) . getAnnotation
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can have a function like

isGlobalAnnotation :: GetAnnotation -> Bool

and use it here. Also, if this function has no wildcard pattern matchings, the compiler will ask us if we want any new annotation that we add in the future to be global or not.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently there can be only one type of annotation, which is IgnoreFile. Here similar function is introduced, called isHeaderNode.


defNode :: Node
defNode = Node Nothing DOCUMENT [] -- hard-coded default Node
Expand Down Expand Up @@ -395,16 +404,23 @@ getPosition node@(Node pos _ _) = do
pure $ PosInfo sl sc sl (sc + annLength - 1)

-- | Extract `IgnoreMode` if current node is xrefcheck annotation.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that this comment should be updated:

IgnoreMode -> GetAnnotation

getIgnoreMode :: Node -> GetIgnoreMode
getIgnoreMode node = maybe NotAnAnnotation (textToMode . words) (getXrefcheckContent node)

textToMode :: [Text] -> GetIgnoreMode
textToMode ("ignore" : [x])
| x == "link" = ValidMode IMLink
| x == "paragraph" = ValidMode IMParagraph
| x == "all" = ValidMode IMAll
| otherwise = InvalidMode x
textToMode _ = NotAnAnnotation
getAnnotation :: Node -> Maybe GetAnnotation
getAnnotation node = getXrefcheckContent node <&> textToMode

textToMode :: Text -> GetAnnotation
textToMode annText = case wordsList of
("ignore" : [x])
| Just ignMode <- getIgnoreMode x -> IgnoreAnnotation ignMode
_ -> InvalidAnnotation annText
where
wordsList = words annText

getIgnoreMode :: Text -> Maybe IgnoreMode
getIgnoreMode = \case
"link" -> Just IMLink
"paragraph" -> Just IMParagraph
"all" -> Just IMAll
_ -> Nothing

parseFileInfo :: MarkdownConfig -> FilePath -> LT.Text -> (FileInfo, [ScanError])
parseFileInfo config fp input
Expand Down
2 changes: 1 addition & 1 deletion tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ test_ignoreAnnotations =
, testCase "Check if broken unrecognised annotation produce error" do
let file = "tests/markdowns/with-annotations/unrecognised_option.md"
errs <- getErrs file
errs @?= makeError (Just $ PosInfo 7 1 7 46) file (UnrecognisedErr "unrecognised-option")
errs @?= makeError (Just $ PosInfo 7 1 7 46) file (UnrecognisedErr "ignore unrecognised-option")
]
, testGroup "\"ignore link\" mode"
[ testCase "Check \"ignore link\" performance" $ do
Expand Down
2 changes: 1 addition & 1 deletion tests/golden/check-scan-errors/expected.gold
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
➥ In file check-scan-errors.md
scan error at src:21:1-50:

Unrecognised option "unrecognised-annotation" perhaps you meant <"ignore link"|"ignore paragraph"|"ignore all">
Unrecognised option "ignore unrecognised-annotation" perhaps you meant <"ignore link"|"ignore paragraph"|"ignore all">
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is a good change.

Mm, however, in the commit description you explicitly tell that you refactor things, which assumes no changes in the application logic.

Please leave a note in the commit descrption that your refactoring led to this minor change.

Copy link
Contributor Author

@YuriRomanowski YuriRomanowski Dec 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, got it, I'll change the description later when prettying commit history.


➥ In file check-second-file.md
scan error at src:9:1-29:
Expand Down