Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Update ending whitespace to include parentheses [no ci]
  • Loading branch information
tgross35 committed Dec 19, 2022
commit 64bee3d7d697547d7c067e98631493c73a790f89
41 changes: 24 additions & 17 deletions compiler/rustc_errors/src/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use std::sync::LazyLock;
use termcolor::{Buffer, BufferWriter, Color, ColorChoice, ColorSpec, WriteColor};

const NEWLINE_CHARS: &[u8; 2] = b"\r\n";
const PUNCT_CHARS: &[u8; 8] = br#".,"'\;:?"#;
const BREAK_CHARS: &[u8; 10] = br#".,"'\;:?()"#;

/// Representation of how to match various markdown types
const PATTERNS: [MdPattern; 10] = [
Expand All @@ -23,9 +23,9 @@ const PATTERNS: [MdPattern; 10] = [
MdPattern::new(Anchor::Sol("## "), Anchor::Eol(""), MdType::Heading2),
MdPattern::new(Anchor::Sol("### "), Anchor::Eol(""), MdType::Heading3),
MdPattern::new(Anchor::Sol("#### "), Anchor::Eol(""), MdType::Heading4),
MdPattern::new(Anchor::LeadWs("`"), Anchor::TrailWs("`"), MdType::CodeInline),
MdPattern::new(Anchor::LeadWs("**"), Anchor::TrailWs("**"), MdType::Strong),
MdPattern::new(Anchor::LeadWs("_"), Anchor::TrailWs("_"), MdType::Emphasis),
MdPattern::new(Anchor::LeadBreak("`"), Anchor::TrailBreak("`"), MdType::CodeInline),
MdPattern::new(Anchor::LeadBreak("**"), Anchor::TrailBreak("**"), MdType::Strong),
MdPattern::new(Anchor::LeadBreak("_"), Anchor::TrailBreak("_"), MdType::Emphasis),
MdPattern::new(Anchor::Sol("-"), Anchor::Eol(""), MdType::ListItem),
// MdPattern::new(Anchor::Any("\n\n"),Anchor::Any(""))
// strikethrough
Expand Down Expand Up @@ -223,10 +223,10 @@ enum Anchor {
Sol(&'static str),
/// End of line
Eol(&'static str),
/// Preceded by whitespace
LeadWs(&'static str),
/// Precedes whitespace OR punctuation
TrailWs(&'static str),
/// Preceded by whitespace or punctuation
LeadBreak(&'static str),
/// Precedes whitespace or punctuation
TrailBreak(&'static str),
/// Plain pattern matching
Any(&'static str),
}
Expand All @@ -235,7 +235,11 @@ impl Anchor {
/// Get any inner value
const fn unwrap(&self) -> &str {
match self {
Self::Sol(s) | Self::Eol(s) | Self::LeadWs(s) | Self::TrailWs(s) | Self::Any(s) => s,
Self::Sol(s)
| Self::Eol(s)
| Self::LeadBreak(s)
| Self::TrailBreak(s)
| Self::Any(s) => s,
}
}
}
Expand All @@ -244,7 +248,7 @@ impl Anchor {
#[derive(Debug, PartialEq, Clone)]
struct Context {
at_line_start: bool,
preceded_by_ws: bool,
preceded_by_break: bool,
}

/// A simple markdown type
Expand Down Expand Up @@ -295,13 +299,11 @@ impl MdPattern {

// Validate postconditions if we have a remaining string
let is_matched = match anchor {
Anchor::TrailWs(_) => {
next_byte.is_ascii_whitespace() | PUNCT_CHARS.contains(next_byte)
}
Anchor::TrailBreak(_) => is_break_char(*next_byte),
Anchor::Eol(_) => NEWLINE_CHARS.contains(next_byte),
Anchor::Sol(_) => at_line_start,
Anchor::Any(_) => true,
Anchor::LeadWs(_) => panic!("unexpected end pattern"),
Anchor::LeadBreak(_) => panic!("unexpected end pattern"),
};

if is_matched {
Expand All @@ -324,7 +326,7 @@ impl MdPattern {
if !ctx.at_line_start && matches!(self.start, Anchor::Sol(_)) {
return None;
}
if !ctx.preceded_by_ws && matches!(self.start, Anchor::LeadWs(_)) {
if !ctx.preceded_by_break && matches!(self.start, Anchor::LeadBreak(_)) {
return None;
}

Expand Down Expand Up @@ -369,7 +371,7 @@ fn recurse_tree<'a>(tree: MdTree<'a>) -> Vec<MdTree<'a>> {
/// Main parser function for a single string
fn parse_str<'a>(s: &'a str) -> Vec<MdTree<'a>> {
let mut v: Vec<MdTree<'_>> = Vec::new();
let mut ctx = Context { at_line_start: true, preceded_by_ws: true };
let mut ctx = Context { at_line_start: true, preceded_by_break: true };
let mut next_ctx = ctx.clone();
let mut working = s.as_bytes();
let mut i = 0;
Expand All @@ -380,7 +382,7 @@ fn parse_str<'a>(s: &'a str) -> Vec<MdTree<'a>> {

ctx = next_ctx.clone();
next_ctx.at_line_start = NEWLINE_CHARS.contains(current_char);
next_ctx.preceded_by_ws = current_char.is_ascii_whitespace();
next_ctx.preceded_by_break = is_break_char(*current_char);

let found = PATTERNS.iter().find_map(|p| p.parse_start(&working[i..], &ctx));

Expand All @@ -404,6 +406,11 @@ fn parse_str<'a>(s: &'a str) -> Vec<MdTree<'a>> {
v
}

/// Test if a character is whitespace or a breaking character (punctuation)
fn is_break_char(c: u8) -> bool {
c.is_ascii_whitespace() || BREAK_CHARS.contains(&c)
}

#[must_use]
pub fn create_ast<'a>(s: &'a str) -> MdTree<'a> {
MdTree::Root(parse_str(s))
Expand Down
20 changes: 11 additions & 9 deletions compiler/rustc_errors/src/markdown/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use super::*;
fn test_comment() {
const TAG: MdType = MdType::Comment;
let pat = PATTERNS.iter().find(|p| p.tag == TAG).unwrap();
let ctx = Context { at_line_start: true, preceded_by_ws: true };
let ctx = Context { at_line_start: true, preceded_by_break: true };

let input = b"none<!--none-->residual";
assert_eq!(
Expand All @@ -22,7 +22,7 @@ fn test_comment() {
fn test_code_block() {
const TAG: MdType = MdType::CodeBlock;
let pat = PATTERNS.iter().find(|p| p.tag == TAG).unwrap();
let ctx = Context { at_line_start: true, preceded_by_ws: true };
let ctx = Context { at_line_start: true, preceded_by_break: true };

let input = b"none\n```\nblock\n```";
let end_expected =
Expand All @@ -35,15 +35,15 @@ fn test_code_block() {
MdResult { matched: MdTree::from_type("\nblock\nof\ncode\n", TAG), residual: b"residual" };
assert_eq!(pat.parse_start(input, &ctx), Some(expected));

let ctx = Context { at_line_start: false, preceded_by_ws: true };
let ctx = Context { at_line_start: false, preceded_by_break: true };
assert_eq!(pat.parse_start(input, &ctx), None);
}

#[test]
fn test_headings() {
const TAG: MdType = MdType::Heading1;
let pat = PATTERNS.iter().find(|p| p.tag == TAG).unwrap();
let ctx = Context { at_line_start: true, preceded_by_ws: true };
let ctx = Context { at_line_start: true, preceded_by_break: true };

let input = b"content\nresidual";
let end_expected = MdResult {
Expand All @@ -58,15 +58,15 @@ fn test_headings() {
let expected = MdResult { matched: MdTree::from_type("content", TAG), residual: b"\nresidual" };
assert_eq!(pat.parse_start(input, &ctx), Some(expected));

let ctx = Context { at_line_start: false, preceded_by_ws: true };
let ctx = Context { at_line_start: false, preceded_by_break: true };
assert_eq!(pat.parse_start(input, &ctx), None);
}

#[test]
fn test_code_inline() {
const TAG: MdType = MdType::CodeInline;
let pat = PATTERNS.iter().find(|p| p.tag == TAG).unwrap();
let ctx = Context { at_line_start: false, preceded_by_ws: true };
let ctx = Context { at_line_start: false, preceded_by_break: true };

let input = b"none `block` residual";
let end_expected = MdResult {
Expand All @@ -81,7 +81,7 @@ fn test_code_inline() {
let expected = MdResult { matched: MdTree::from_type("block", TAG), residual: b" residual" };
assert_eq!(pat.parse_start(input, &ctx), Some(expected));

let ctx = Context { at_line_start: false, preceded_by_ws: false };
let ctx = Context { at_line_start: false, preceded_by_break: false };
assert_eq!(pat.parse_start(input, &ctx), None);
}

Expand All @@ -96,7 +96,7 @@ more code;

<!-- I should disappear -->
Further `inline`, some **bold**, a bit of _italics
wrapped across lines_.
wrapped across lines_. We can also try (`code inside parentheses`).

Let's end with a list:

Expand Down Expand Up @@ -131,7 +131,9 @@ fn expected_ast() -> MdTree<'static> {
MdTree::Strong("bold"),
MdTree::PlainText(", a bit of "),
MdTree::Emphasis("italics\nwrapped across lines"),
MdTree::PlainText(".\n\nLet's end with a list:\n\n"),
MdTree::PlainText(". We can also try ("),
MdTree::CodeInline("code inside parentheses"),
MdTree::PlainText(").\n\nLet's end with a list:\n\n"),
MdTree::ListItem(vec![
MdTree::PlainText(" Item 1 "),
MdTree::Emphasis("italics"),
Expand Down