Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Fix panics parsing regex with whitespace in extended mode
The added tests fail without the fix like this:

    ---- parser::tests::ignore_space_escape_hex2 stdout ----
    	thread 'parser::tests::ignore_space_escape_hex2' panicked at 'called `Result::unwrap()` on an `Err` value: Error { pos: 10, surround: "x 5 3", kind: InvalidBase16(" 5 3") }', src/libcore/result.rs:860

    ---- parser::tests::ignore_space_escape_hex stdout ----
    	thread 'parser::tests::ignore_space_escape_hex' panicked at 'called `Result::unwrap()` on an `Err` value: Error { pos: 12, surround: "{ 5 3 }", kind: InvalidBase16(" 5 3") }', src/libcore/result.rs:860

    ---- parser::tests::ignore_space_ascii_classes stdout ----
    	thread 'parser::tests::ignore_space_ascii_classes' panicked at 'called `Result::unwrap()` on an `Err` value: Error { pos: 5, surround: "(?x)[ [ : ", kind: UnsupportedClassChar('[') }', src/libcore/result.rs:860
    note: Run with `RUST_BACKTRACE=1` for a backtrace.

    ---- parser::tests::ignore_space_escape_octal stdout ----
    	thread 'parser::tests::ignore_space_escape_octal' panicked at 'valid octal number', src/libcore/option.rs:785

    ---- parser::tests::ignore_space_escape_unicode_name stdout ----
    	thread 'parser::tests::ignore_space_escape_unicode_name' panicked at 'called `Result::unwrap()` on an `Err` value: Error { pos: 15, surround: "Y i }", kind: UnrecognizedUnicodeClass(" Y i") }', src/libcore/result.rs:860

    ---- parser::tests::ignore_space_repeat_counted stdout ----
    	thread 'parser::tests::ignore_space_repeat_counted' panicked at 'called `Result::unwrap()` on an `Err` value: Error { pos: 15, surround: ", 1 0 }", kind: InvalidBase10("1 0") }', src/libcore/result.rs:860

The reason for the panics is that `bump_get` would ignore space when
walking the characters, but then keep the spaces in the returned String.

Found using cargo-fuzz.
  • Loading branch information
robinst committed Mar 20, 2017
commit bbf4e159dc745fed177f6163537490cf384dc317
59 changes: 54 additions & 5 deletions regex-syntax/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ impl Parser {
//
// Start: `1`
// End: `,` (where `until == ','`)
fn parse_decimal<B: Bumpable>(&mut self, until: B) -> Result<u32> {
fn parse_decimal<F: FnMut(char) -> bool>(&mut self, until: F) -> Result<u32> {
match self.bump_get(until) {
// e.g., a{}
None => Err(self.err(ErrorKind::MissingBase10)),
Expand Down Expand Up @@ -809,14 +809,23 @@ impl Parser {

fn eof(&self) -> bool { self.chars().next().is_none() }

fn bump_get<B: Bumpable>(&mut self, s: B) -> Option<String> {
let n = s.match_end(self);
fn bump_get<F: FnMut(char) -> bool>(&mut self, mut f: F) -> Option<String> {
let mut s = String::new();
let n = {
let bumpable = |c| {
if f(c) {
s.push(c);
true
} else {
false
}
};
bumpable.match_end(self)
};
if n == 0 {
None
} else {
let end = checkadd(self.chari, n);
let s = self.chars[self.chari..end]
.iter().cloned().collect::<String>();
self.chari = end;
Some(s)
}
Expand Down Expand Up @@ -2374,6 +2383,46 @@ mod tests {
D"), Expr::Class(class(PERLD).negate()));
}

#[test]
fn ignore_space_escape_unicode_name() {
assert_eq!(p(r"(?x)\ p { Y i }"), Expr::Class(class(YI)));
}

#[test]
fn ignore_space_escape_octal() {
assert_eq!(p(r"(?x)\ 1 2 3"), lit('S'));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems a bit weird that it's allowed to add space between digits of a number, but that seems to be the closest to the current behavior.

assert_eq!(p(r"(?x)\
1 2 3"), lit('S'));
}

#[test]
fn ignore_space_escape_hex() {
assert_eq!(p(r"(?x)\x { 5 3 }"), lit('S'));
assert_eq!(p(r"(?x)\x
{ 5 3 }"), lit('S'));
}

#[test]
fn ignore_space_escape_hex2() {
assert_eq!(p(r"(?x)\x 5 3"), lit('S'));
assert_eq!(p(r"(?x)\x
5 3"), lit('S'));
}

#[test]
fn ignore_space_repeat_counted() {
assert_eq!(p("(?x)a { 5 , 1 0 }"), Expr::Repeat {
e: b(lit('a')),
r: Repeater::Range { min: 5, max: Some(10) },
greedy: true,
});
}

#[test]
fn ignore_space_ascii_classes() {
assert_eq!(p("(?x)[ [ : u p p e r : ] ]"), Expr::Class(class(UPPER)));
}

#[test]
fn ignore_space_comments() {
assert_eq!(p(r"(?x)(?P<foo>
Expand Down