diff --git a/regex-syntax/src/literals.rs b/regex-syntax/src/literals.rs index 6b328a2360..db40525306 100644 --- a/regex-syntax/src/literals.rs +++ b/regex-syntax/src/literals.rs @@ -216,14 +216,17 @@ impl Literals { if self.lits.is_empty() { return self.to_empty(); } + let mut old: Vec = self.lits.iter().cloned().collect(); let mut new = self.to_empty(); 'OUTER: - for lit1 in &self.lits { + while let Some(mut candidate) = old.pop() { + if candidate.is_empty() { + continue; + } if new.lits.is_empty() { - new.lits.push(lit1.clone()); + new.lits.push(candidate); continue; } - let mut candidate = lit1.clone(); for lit2 in &mut new.lits { if lit2.is_empty() { continue; @@ -236,11 +239,14 @@ impl Literals { lit2.cut = candidate.cut; continue 'OUTER; } - if candidate.len() <= lit2.len() { + if candidate.len() < lit2.len() { if let Some(i) = position(&candidate, &lit2) { - lit2.truncate(i); - lit2.cut(); candidate.cut(); + let mut lit3 = lit2.clone(); + lit3.truncate(i); + lit3.cut(); + old.push(lit3); + lit2.clear(); } } else { if let Some(i) = position(&lit2, &candidate) { @@ -1381,6 +1387,9 @@ mod tests { test_unamb!(unambiguous11, vec![M("zazb"), M("azb")], vec![C("azb"), C("z")]); test_unamb!(unambiguous12, vec![M("foo"), C("foo")], vec![C("foo")]); + test_unamb!(unambiguous13, + vec![M("ABCX"), M("CDAX"), M("BCX")], + vec![C("A"), C("BCX"), C("CD")]); // ************************************************************************ // Tests for suffix trimming. diff --git a/tests/regression.rs b/tests/regression.rs index 68204d9f7c..fb85507561 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -68,3 +68,6 @@ mat!(ascii_boundary_capture, u!(r"(?-u)(\B)"), "\u{28f3e}", Some((0, 0))); // See: https://github.com/rust-lang-nursery/regex/issues/280 ismatch!(partial_anchor_alternate_begin, u!(r"^a|z"), "yyyyya", false); ismatch!(partial_anchor_alternate_end, u!(r"a$|z"), "ayyyyy", false); + +// See: https://github.com/rust-lang-nursery/regex/issues/289 +mat!(lits_unambiguous, u!(r"(ABC|CDA|BC)X"), "CDAX", Some((0, 4)));