From 73d0b999b86f27f32f891031ada83e7e376bc3bf Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Wed, 22 Aug 2018 14:49:03 +0200 Subject: [PATCH 01/10] Add RegexSet benchmarks --- bench/src/bench.rs | 54 ++++++++++++++++++++++++++++++++++++++++++++-- bench/src/misc.rs | 20 ++++++++++++++++- 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/bench/src/bench.rs b/bench/src/bench.rs index 6cb56db850..8c2d082201 100644 --- a/bench/src/bench.rs +++ b/bench/src/bench.rs @@ -44,9 +44,9 @@ pub use ffi::re2::Regex; #[cfg(feature = "re-dphobos")] pub use ffi::d_phobos::Regex; #[cfg(feature = "re-rust")] -pub use regex::Regex; +pub use regex::{Regex, RegexSet}; #[cfg(feature = "re-rust-bytes")] -pub use regex::bytes::Regex; +pub use regex::bytes::{Regex, RegexSet}; #[cfg(feature = "re-tcl")] pub use ffi::tcl::Regex; @@ -272,6 +272,56 @@ macro_rules! bench_captures { } } +macro_rules! bench_is_match_set { + ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { + #[bench] + fn $name(b: &mut Bencher) { + use std::sync::Mutex; + lazy_static! { + static ref RE: Mutex = Mutex::new($re); + static ref TEXT: Mutex = Mutex::new(text!($haystack)); + }; + let re = RE.lock().unwrap(); + let text = TEXT.lock().unwrap(); + b.bytes = text.len() as u64; + b.iter(|| { + if re.is_match(&text) != $is_match { + if $is_match { + panic!("expected match, got not match"); + } else { + panic!("expected no match, got match"); + } + } + }); + } + } +} + +macro_rules! bench_matches_set { + ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { + #[bench] + fn $name(b: &mut Bencher) { + use std::sync::Mutex; + lazy_static! { + static ref RE: Mutex = Mutex::new($re); + static ref TEXT: Mutex = Mutex::new(text!($haystack)); + }; + let re = RE.lock().unwrap(); + let text = TEXT.lock().unwrap(); + b.bytes = text.len() as u64; + b.iter(|| { + if re.matches(&text).matched_any() != $is_match { + if $is_match { + panic!("expected match, got not match"); + } else { + panic!("expected no match, got match"); + } + } + }); + } + } +} + mod ffi; mod misc; mod regexdna; diff --git a/bench/src/misc.rs b/bench/src/misc.rs index ad516e23e4..69ea21f842 100644 --- a/bench/src/misc.rs +++ b/bench/src/misc.rs @@ -14,7 +14,7 @@ use std::iter::repeat; use test::Bencher; -use {Regex, Text}; +use {Regex, RegexSet, Text}; #[cfg(not(feature = "re-onig"))] #[cfg(not(feature = "re-pcre1"))] @@ -278,3 +278,21 @@ bench_captures!(short_haystack_1000000x, repeat("aaaa").take(1000000).collect::(), repeat("dddd").take(1000000).collect::(), )); + +#[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))] +bench_is_match_set!(is_match_set, + true, + RegexSet::new(vec!["aaaaaaaaaaaaaaaaaaa", "abc579", "def.+", "e24fg", "a.*2c", "23."]).unwrap(), + format!("{}a482c{}", + repeat('a').take(10).collect::(), + repeat('b').take(10).collect::()) + ); + +#[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))] +bench_matches_set!(matches_set, + true, + RegexSet::new(vec!["aaaaaaaaaaaaaaaaaaa", "abc579", "def.+", "e24fg", "a.*2c", "23."]).unwrap(), + format!("{}a482c{}", + repeat('a').take(10).collect::(), + repeat('b').take(10).collect::()) + ); From 939111439d03d8cedeb7905ff132bac73765b5f3 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Thu, 23 Aug 2018 09:04:52 +0200 Subject: [PATCH 02/10] refactor: Simplify SparseSet slightly --- src/sparse.rs | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/src/sparse.rs b/src/sparse.rs index 34c05e7157..0629dea793 100644 --- a/src/sparse.rs +++ b/src/sparse.rs @@ -14,52 +14,49 @@ use std::slice; #[derive(Clone, Debug)] pub struct SparseSet { /// Dense contains the instruction pointers in the order in which they - /// were inserted. Accessing elements >= self.size is illegal. + /// were inserted. dense: Vec, /// Sparse maps instruction pointers to their location in dense. /// /// An instruction pointer is in the set if and only if - /// sparse[ip] < size && ip == dense[sparse[ip]]. - sparse: Vec, - /// The number of elements in the set. - size: usize, + /// sparse[ip] < dense.len() && ip == dense[sparse[ip]]. + sparse: Box<[usize]>, } impl SparseSet { pub fn new(size: usize) -> SparseSet { SparseSet { - dense: vec![0; size], - sparse: vec![0; size], - size: 0, + dense: Vec::with_capacity(size), + sparse: vec![0; size].into_boxed_slice(), } } pub fn len(&self) -> usize { - self.size + self.dense.len() } pub fn is_empty(&self) -> bool { - self.size == 0 + self.dense.is_empty() } pub fn capacity(&self) -> usize { - self.dense.len() + self.dense.capacity() } pub fn insert(&mut self, value: usize) { - let i = self.size; - self.dense[i] = value; + let i = self.len(); + assert!(i < self.capacity()); + self.dense.push(value); self.sparse[value] = i; - self.size += 1; } pub fn contains(&self, value: usize) -> bool { let i = self.sparse[value]; - i < self.size && self.dense[i] == value + self.dense.get(i) == Some(&value) } pub fn clear(&mut self) { - self.size = 0; + self.dense.clear(); } } @@ -67,7 +64,7 @@ impl Deref for SparseSet { type Target = [usize]; fn deref(&self) -> &Self::Target { - &self.dense[0..self.size] + &self.dense } } From 1a0f55cfba4a9be0f9c90db239269fe66612b743 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Thu, 23 Aug 2018 09:07:23 +0200 Subject: [PATCH 03/10] perf: Always inline Program's Deref Shaves of a few percent on dfa heavy regexes such as `RegexSet` use. --- src/prog.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/prog.rs b/src/prog.rs index 198e1f45c4..b62802f2b7 100644 --- a/src/prog.rs +++ b/src/prog.rs @@ -161,6 +161,7 @@ impl Program { impl Deref for Program { type Target = [Inst]; + #[inline(always)] fn deref(&self) -> &Self::Target { &*self.insts } From 5209bf10815346b496117fff57a8e782d5ef760c Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Fri, 24 Aug 2018 14:54:04 +0200 Subject: [PATCH 04/10] perf: Cache the Vec used to build state keys Avoids frequent re-allocations while the instructions are pushed to it. --- src/dfa.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/dfa.rs b/src/dfa.rs index 992a45cc62..7d77e38283 100644 --- a/src/dfa.rs +++ b/src/dfa.rs @@ -155,6 +155,8 @@ struct CacheInner { /// The total heap size of the DFA's cache. We use this to determine when /// we should flush the cache. size: usize, + + insts_scratch_space: Vec, } /// The transition table. @@ -435,6 +437,7 @@ impl Cache { stack: vec![], flush_count: 0, size: 0, + insts_scratch_space: vec![], }, qcur: SparseSet::new(prog.insts.len()), qnext: SparseSet::new(prog.insts.len()), @@ -1220,7 +1223,10 @@ impl<'a> Fsm<'a> { // cache. // Reserve 1 byte for flags. - let mut insts = vec![0]; + let mut insts = mem::replace(&mut self.cache.insts_scratch_space, Vec::new()); + insts.clear(); + insts.push(0); + let mut prev = 0; for &ip in q { let ip = usize_to_u32(ip); @@ -1244,13 +1250,15 @@ impl<'a> Fsm<'a> { // see a match when expanding NFA states previously, then this is a // dead state and no amount of additional input can transition out // of this state. - if insts.len() == 1 && !state_flags.is_match() { + let opt_state = if insts.len() == 1 && !state_flags.is_match() { None } else { let StateFlags(f) = *state_flags; insts[0] = f; - Some(State { data: insts.into_boxed_slice() }) - } + Some(State { data: insts.clone().into_boxed_slice() }) + }; + self.cache.insts_scratch_space = insts; + opt_state } /// Clears the cache, but saves and restores current_state if it is not From 3e50be90fd181c8785cf89698e66b85c13620d0e Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Fri, 7 Sep 2018 18:25:37 +0200 Subject: [PATCH 05/10] perf: Avoid cloning the State data if the state exists --- src/dfa.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/dfa.rs b/src/dfa.rs index 7d77e38283..8b9ee6d46b 100644 --- a/src/dfa.rs +++ b/src/dfa.rs @@ -117,7 +117,7 @@ struct CacheInner { /// things, we just pass indexes around manually. The performance impact of /// this is probably an instruction or two in the inner loop. However, on /// 64 bit, each StatePtr is half the size of a *State. - compiled: HashMap, + compiled: HashMap, StatePtr>, /// The transition table. /// /// The transition table is laid out in row-major order, where states are @@ -1177,14 +1177,15 @@ impl<'a> Fsm<'a> { // in q. We use this as an optimization in exec_byte to determine when // we should follow epsilon transitions at the empty string preceding // the current byte. - let key = match self.cached_state_key(q, &mut state_flags) { + match self.cached_state_key(q, &mut state_flags) { None => return Some(STATE_DEAD), Some(v) => v, - }; + } // In the cache? Cool. Done. - if let Some(&si) = self.cache.compiled.get(&key) { + if let Some(&si) = self.cache.compiled.get(&self.cache.insts_scratch_space[..]) { return Some(si); } + // If the cache has gotten too big, wipe it. if self.approximate_size() > self.prog.dfa_size_limit && !self.clear_cache_and_save(current_state) @@ -1192,6 +1193,10 @@ impl<'a> Fsm<'a> { // Ooops. DFA is giving up. return None; } + + let key = State { + data: self.cache.insts_scratch_space.clone().into_boxed_slice() + }; // Allocate room for our state and add it. self.add_state(key) } @@ -1210,7 +1215,7 @@ impl<'a> Fsm<'a> { &mut self, q: &SparseSet, state_flags: &mut StateFlags, - ) -> Option { + ) -> Option<()> { use prog::Inst::*; // We need to build up enough information to recognize pre-built states @@ -1255,7 +1260,7 @@ impl<'a> Fsm<'a> { } else { let StateFlags(f) = *state_flags; insts[0] = f; - Some(State { data: insts.clone().into_boxed_slice() }) + Some(()) }; self.cache.insts_scratch_space = insts; opt_state @@ -1342,7 +1347,7 @@ impl<'a> Fsm<'a> { fn restore_state(&mut self, state: State) -> Option { // If we've already stored this state, just return a pointer to it. // None will be the wiser. - if let Some(&si) = self.cache.compiled.get(&state) { + if let Some(&si) = self.cache.compiled.get(&state.data) { return Some(si); } self.add_state(state) @@ -1520,7 +1525,7 @@ impl<'a> Fsm<'a> { + (2 * mem::size_of::()) + mem::size_of::(); self.cache.states.push(state.clone()); - self.cache.compiled.insert(state, si); + self.cache.compiled.insert(state.data, si); // Transition table and set of states and map should all be in sync. debug_assert!(self.cache.states.len() == self.cache.trans.num_states()); From 59b3b36b1d8290f08062e6d907d14ac049c75662 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Fri, 7 Sep 2018 17:41:21 +0200 Subject: [PATCH 06/10] Remove Clone from Cache --- src/dfa.rs | 4 ++-- src/exec.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dfa.rs b/src/dfa.rs index 8b9ee6d46b..1f381fe618 100644 --- a/src/dfa.rs +++ b/src/dfa.rs @@ -88,7 +88,7 @@ pub fn can_exec(insts: &Program) -> bool { /// This cache is reused between multiple invocations of the same regex /// program. (It is not shared simultaneously between threads. If there is /// contention, then new caches are created.) -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct Cache { /// Group persistent DFA related cache state together. The sparse sets /// listed below are used as scratch space while computing uncached states. @@ -107,7 +107,7 @@ pub struct Cache { /// `CacheInner` is logically just a part of Cache, but groups together fields /// that aren't passed as function parameters throughout search. (This split /// is mostly an artifact of the borrow checker. It is happily paid.) -#[derive(Clone, Debug)] +#[derive(Debug)] struct CacheInner { /// A cache of pre-compiled DFA states, keyed by the set of NFA states /// and the set of empty-width flags set at the byte in the input when the diff --git a/src/exec.rs b/src/exec.rs index 578289aa5c..52d970f801 100644 --- a/src/exec.rs +++ b/src/exec.rs @@ -1271,7 +1271,7 @@ enum MatchNfaType { /// available to a particular program. pub type ProgramCache = RefCell; -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct ProgramCacheInner { pub pikevm: pikevm::Cache, pub backtrack: backtrack::Cache, From c4c14ce62c48a2b0545fbf7b6cc43a8742ce219f Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Wed, 26 Sep 2018 13:14:08 +0200 Subject: [PATCH 07/10] refactor: Implement Borrow<[u8]> for State so it can be used as a map key --- src/dfa.rs | 213 ++++++++++++++++++++++------------------------------- 1 file changed, 89 insertions(+), 124 deletions(-) diff --git a/src/dfa.rs b/src/dfa.rs index 1f381fe618..1d9cdc51a6 100644 --- a/src/dfa.rs +++ b/src/dfa.rs @@ -46,6 +46,7 @@ articles on regexes is strongly recommended: https://swtch.com/~rsc/regexp/ implementation.) */ +use std::borrow::Borrow; use std::collections::HashMap; use std::fmt; use std::iter::repeat; @@ -117,7 +118,7 @@ struct CacheInner { /// things, we just pass indexes around manually. The performance impact of /// this is probably an instruction or two in the inner loop. However, on /// 64 bit, each StatePtr is half the size of a *State. - compiled: HashMap, StatePtr>, + compiled: HashMap, /// The transition table. /// /// The transition table is laid out in row-major order, where states are @@ -156,7 +157,7 @@ struct CacheInner { /// we should flush the cache. size: usize, - insts_scratch_space: Vec, + insts_scratch_space: Vec, } /// The transition table. @@ -269,10 +270,16 @@ impl Result { /// it is packed into a single byte; Otherwise the byte 128 (-128 as an i8) /// is coded as a flag, followed by 4 bytes encoding the delta. #[derive(Clone, Eq, Hash, PartialEq)] -struct State{ +struct State { data: Box<[u8]>, } +impl Borrow<[u8]> for State { + fn borrow(&self) -> &[u8] { + &self.data + } +} + /// `InstPtr` is a 32 bit pointer into a sequence of opcodes (i.e., it indexes /// an NFA state). /// @@ -294,7 +301,7 @@ struct InstPtrs<'a> { data: &'a [u8], } -impl <'a>Iterator for InstPtrs<'a> { +impl<'a> Iterator for InstPtrs<'a> { type Item = usize; fn next(&mut self) -> Option { @@ -372,7 +379,7 @@ type StatePtr = u32; /// An unknown state means that the state has not been computed yet, and that /// the only way to progress is to compute it. -const STATE_UNKNOWN: StatePtr = 1<<31; +const STATE_UNKNOWN: StatePtr = 1 << 31; /// A dead state means that the state has been computed and it is known that /// once it is entered, no future match can ever occur. @@ -386,12 +393,12 @@ const STATE_QUIT: StatePtr = STATE_DEAD + 1; /// A start state is a state that the DFA can start in. /// /// Note that start states have their lower bits set to a state pointer. -const STATE_START: StatePtr = 1<<30; +const STATE_START: StatePtr = 1 << 30; /// A match state means that the regex has successfully matched. /// /// Note that match states have their lower bits set to a state pointer. -const STATE_MATCH: StatePtr = 1<<29; +const STATE_MATCH: StatePtr = 1 << 29; /// The maximum state pointer. This is useful to mask out the "valid" state /// pointer from a state with the "start" or "match" bits set. @@ -451,8 +458,7 @@ impl CacheInner { /// Resets the cache size to account for fixed costs, such as the program /// and stack sizes. fn reset_size(&mut self) { - self.size = - (self.start_states.len() * mem::size_of::()) + self.size = (self.start_states.len() * mem::size_of::()) + (self.stack.len() * mem::size_of::()); } } @@ -478,11 +484,7 @@ impl<'a> Fsm<'a> { cache: &mut cache.inner, }; let (empty_flags, state_flags) = dfa.start_flags(text, at); - dfa.start = match dfa.start_state( - &mut cache.qcur, - empty_flags, - state_flags, - ) { + dfa.start = match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) { None => return Result::Quit, Some(STATE_DEAD) => return Result::NoMatch(at), Some(si) => si, @@ -511,11 +513,7 @@ impl<'a> Fsm<'a> { cache: &mut cache.inner, }; let (empty_flags, state_flags) = dfa.start_flags_reverse(text, at); - dfa.start = match dfa.start_state( - &mut cache.qcur, - empty_flags, - state_flags, - ) { + dfa.start = match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) { None => return Result::Quit, Some(STATE_DEAD) => return Result::NoMatch(at), Some(si) => si, @@ -545,11 +543,7 @@ impl<'a> Fsm<'a> { cache: &mut cache.inner, }; let (empty_flags, state_flags) = dfa.start_flags(text, at); - dfa.start = match dfa.start_state( - &mut cache.qcur, - empty_flags, - state_flags, - ) { + dfa.start = match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) { None => return Result::Quit, Some(STATE_DEAD) => return Result::NoMatch(at), Some(si) => si, @@ -677,8 +671,7 @@ impl<'a> Fsm<'a> { // match states are final. Therefore, we can quit. if self.prog.matches.len() > 1 { let state = self.state(next_si); - let just_matches = state.inst_ptrs() - .all(|ip| self.prog[ip].is_match()); + let just_matches = state.inst_ptrs().all(|ip| self.prog[ip].is_match()); if just_matches { return result; } @@ -689,12 +682,9 @@ impl<'a> Fsm<'a> { // very quickly, and only recording the match location once // we've left this particular state. let cur = at; - while (next_si & !STATE_MATCH) == prev_si - && at + 2 < text.len() { + while (next_si & !STATE_MATCH) == prev_si && at + 2 < text.len() { // Argument for safety is in the definition of next_si. - next_si = unsafe { - self.next_si(next_si & !STATE_MATCH, text, at) - }; + next_si = unsafe { self.next_si(next_si & !STATE_MATCH, text, at) }; at += 1; } if at > cur { @@ -811,7 +801,7 @@ impl<'a> Fsm<'a> { next_si &= !STATE_MATCH; result = Result::Match(at + 1); if self.quit_after_match { - return result + return result; } self.last_match_si = next_si; prev_si = next_si; @@ -819,9 +809,7 @@ impl<'a> Fsm<'a> { while (next_si & !STATE_MATCH) == prev_si && at >= 2 { // Argument for safety is in the definition of next_si. at -= 1; - next_si = unsafe { - self.next_si(next_si & !STATE_MATCH, text, at) - }; + next_si = unsafe { self.next_si(next_si & !STATE_MATCH, text, at) }; } if at < cur { result = Result::Match(at + 2); @@ -988,8 +976,7 @@ impl<'a> Fsm<'a> { state_flags.set_match(); if !self.continue_past_first_match() { break; - } else if self.prog.matches.len() > 1 - && !qnext.contains(ip as usize) { + } else if self.prog.matches.len() > 1 && !qnext.contains(ip as usize) { // If we are continuing on to find other matches, // then keep a record of the match states we've seen. qnext.insert(ip); @@ -997,25 +984,23 @@ impl<'a> Fsm<'a> { } Bytes(ref inst) => { if b.as_byte().map_or(false, |b| inst.matches(b)) { - self.follow_epsilons( - inst.goto as InstPtr, qnext, empty_flags); + self.follow_epsilons(inst.goto as InstPtr, qnext, empty_flags); } } } } - let cache = - if b.is_eof() && self.prog.matches.len() > 1 { - // If we're processing the last byte of the input and we're - // matching a regex set, then make the next state contain the - // previous states transitions. We do this so that the main - // matching loop can extract all of the match instructions. - mem::swap(qcur, qnext); - // And don't cache this state because it's totally bunk. - false - } else { - true - }; + let cache = if b.is_eof() && self.prog.matches.len() > 1 { + // If we're processing the last byte of the input and we're + // matching a regex set, then make the next state contain the + // previous states transitions. We do this so that the main + // matching loop can extract all of the match instructions. + mem::swap(qcur, qnext); + // And don't cache this state because it's totally bunk. + false + } else { + true + }; // We've now built up the set of NFA states that ought to comprise the // next DFA state, so try to find it in the cache, and if it doesn't @@ -1024,11 +1009,7 @@ impl<'a> Fsm<'a> { // N.B. We pass `&mut si` here because the cache may clear itself if // it has gotten too full. When that happens, the location of the // current state may change. - let mut next = match self.cached_state( - qnext, - state_flags, - Some(&mut si), - ) { + let mut next = match self.cached_state(qnext, state_flags, Some(&mut si)) { None => return None, Some(next) => next, }; @@ -1073,14 +1054,9 @@ impl<'a> Fsm<'a> { /// line should be set if the preceding byte is `\n`. End line should never /// be set in this case. (Even if the proceding byte is a `\n`, it will /// be handled in a subsequent DFA state.) - fn follow_epsilons( - &mut self, - ip: InstPtr, - q: &mut SparseSet, - flags: EmptyFlags, - ) { - use prog::Inst::*; + fn follow_epsilons(&mut self, ip: InstPtr, q: &mut SparseSet, flags: EmptyFlags) { use prog::EmptyLook::*; + use prog::Inst::*; // We need to traverse the NFA to follow epsilon transitions, so avoid // recursion with an explicit stack. @@ -1127,9 +1103,8 @@ impl<'a> Fsm<'a> { NotWordBoundary if flags.not_word_boundary => { ip = inst.goto as InstPtr; } - StartLine | EndLine | StartText | EndText - | WordBoundaryAscii | NotWordBoundaryAscii - | WordBoundary | NotWordBoundary => { + StartLine | EndLine | StartText | EndText | WordBoundaryAscii + | NotWordBoundaryAscii | WordBoundary | NotWordBoundary => { break; } } @@ -1189,13 +1164,13 @@ impl<'a> Fsm<'a> { // If the cache has gotten too big, wipe it. if self.approximate_size() > self.prog.dfa_size_limit && !self.clear_cache_and_save(current_state) - { - // Ooops. DFA is giving up. - return None; - } + { + // Ooops. DFA is giving up. + return None; + } let key = State { - data: self.cache.insts_scratch_space.clone().into_boxed_slice() + data: self.cache.insts_scratch_space.clone().into_boxed_slice(), }; // Allocate room for our state and add it. self.add_state(key) @@ -1211,11 +1186,7 @@ impl<'a> Fsm<'a> { /// Specifically, q should be an ordered set of NFA states and is_match /// should be true if and only if the previous NFA states contained a match /// state. - fn cached_state_key( - &mut self, - q: &SparseSet, - state_flags: &mut StateFlags, - ) -> Option<()> { + fn cached_state_key(&mut self, q: &SparseSet, state_flags: &mut StateFlags) -> Option<()> { use prog::Inst::*; // We need to build up enough information to recognize pre-built states @@ -1274,10 +1245,7 @@ impl<'a> Fsm<'a> { /// /// This returns false if the cache is not cleared and the DFA should /// give up. - fn clear_cache_and_save( - &mut self, - current_state: Option<&mut StatePtr>, - ) -> bool { + fn clear_cache_and_save(&mut self, current_state: Option<&mut StatePtr>) -> bool { if self.cache.states.is_empty() { // Nothing to clear... return true; @@ -1311,7 +1279,8 @@ impl<'a> Fsm<'a> { let nstates = self.cache.states.len(); if self.cache.flush_count >= 3 && self.at >= self.last_cache_flush - && (self.at - self.last_cache_flush) <= 10 * nstates { + && (self.at - self.last_cache_flush) <= 10 * nstates + { return false; } // Update statistics tracking cache flushes. @@ -1347,7 +1316,7 @@ impl<'a> Fsm<'a> { fn restore_state(&mut self, state: State) -> Option { // If we've already stored this state, just return a pointer to it. // None will be the wiser. - if let Some(&si) = self.cache.compiled.get(&state.data) { + if let Some(&si) = self.cache.compiled.get(&state) { return Some(si); } self.add_state(state) @@ -1402,14 +1371,13 @@ impl<'a> Fsm<'a> { // matches are delayed by one byte, start states can never be match // states. let flagi = { - (((empty_flags.start as u8) << 0) | - ((empty_flags.end as u8) << 1) | - ((empty_flags.start_line as u8) << 2) | - ((empty_flags.end_line as u8) << 3) | - ((empty_flags.word_boundary as u8) << 4) | - ((empty_flags.not_word_boundary as u8) << 5) | - ((state_flags.is_word() as u8) << 6)) - as usize + (((empty_flags.start as u8) << 0) + | ((empty_flags.end as u8) << 1) + | ((empty_flags.start_line as u8) << 2) + | ((empty_flags.end_line as u8) << 3) + | ((empty_flags.word_boundary as u8) << 4) + | ((empty_flags.not_word_boundary as u8) << 5) + | ((state_flags.is_word() as u8) << 6)) as usize }; match self.cache.start_states[flagi] { STATE_UNKNOWN => {} @@ -1460,11 +1428,7 @@ impl<'a> Fsm<'a> { /// /// This should only be used when executing the DFA in reverse over the /// input. - fn start_flags_reverse( - &self, - text: &[u8], - at: usize, - ) -> (EmptyFlags, StateFlags) { + fn start_flags_reverse(&self, text: &[u8], at: usize) -> (EmptyFlags, StateFlags) { let mut empty_flags = EmptyFlags::default(); let mut state_flags = StateFlags::default(); empty_flags.start = at == text.len(); @@ -1472,8 +1436,7 @@ impl<'a> Fsm<'a> { empty_flags.start_line = at == text.len() || text[at] == b'\n'; empty_flags.end_line = text.is_empty(); - let is_word_last = - at < text.len() && Byte::byte(text[at]).is_ascii_word(); + let is_word_last = at < text.len() && Byte::byte(text[at]).is_ascii_word(); let is_word = at > 0 && Byte::byte(text[at - 1]).is_ascii_word(); if is_word_last { state_flags.set_word(); @@ -1519,18 +1482,15 @@ impl<'a> Fsm<'a> { } // Finally, put our actual state on to our heap of states and index it // so we can find it later. - self.cache.size += - self.cache.trans.state_heap_size() + self.cache.size += self.cache.trans.state_heap_size() + (2 * state.data.len()) + (2 * mem::size_of::()) + mem::size_of::(); self.cache.states.push(state.clone()); - self.cache.compiled.insert(state.data, si); + self.cache.compiled.insert(state, si); // Transition table and set of states and map should all be in sync. - debug_assert!(self.cache.states.len() - == self.cache.trans.num_states()); - debug_assert!(self.cache.states.len() - == self.cache.compiled.len()); + debug_assert!(self.cache.states.len() == self.cache.trans.num_states()); + debug_assert!(self.cache.states.len() == self.cache.compiled.len()); Some(si) } @@ -1583,9 +1543,7 @@ impl<'a> Fsm<'a> { /// Returns true if there is a prefix we can quickly search for. fn has_prefix(&self) -> bool { - !self.prog.is_reverse - && !self.prog.prefixes.is_empty() - && !self.prog.is_anchored_start + !self.prog.is_reverse && !self.prog.prefixes.is_empty() && !self.prog.is_anchored_start } /// Sets the STATE_START bit in the given state pointer if and only if @@ -1636,7 +1594,8 @@ impl Transitions { if si > STATE_MAX as usize { return None; } - self.table.extend(repeat(STATE_UNKNOWN).take(self.num_byte_classes)); + self.table + .extend(repeat(STATE_UNKNOWN).take(self.num_byte_classes)); Some(usize_to_u32(si)) } @@ -1695,9 +1654,15 @@ impl StateFlags { } impl Byte { - fn byte(b: u8) -> Self { Byte(b as u16) } - fn eof() -> Self { Byte(256) } - fn is_eof(&self) -> bool { self.0 == 256 } + fn byte(b: u8) -> Self { + Byte(b as u16) + } + fn eof() -> Self { + Byte(256) + } + fn is_eof(&self) -> bool { + self.0 == 256 + } fn is_ascii_word(&self) -> bool { let b = match self.as_byte() { @@ -1723,9 +1688,9 @@ impl fmt::Debug for State { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let ips: Vec = self.inst_ptrs().collect(); f.debug_struct("State") - .field("flags", &self.flags()) - .field("insts", &ips) - .finish() + .field("flags", &self.flags()) + .field("insts", &ips) + .finish() } } @@ -1764,10 +1729,10 @@ impl<'a> fmt::Debug for TransitionsRow<'a> { impl fmt::Debug for StateFlags { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("StateFlags") - .field("is_match", &self.is_match()) - .field("is_word", &self.is_word()) - .field("has_empty", &self.has_empty()) - .finish() + .field("is_match", &self.is_match()) + .field("is_word", &self.is_word()) + .field("has_empty", &self.has_empty()) + .finish() } } @@ -1857,11 +1822,10 @@ fn read_varu32(data: &[u8]) -> (u32, usize) { mod tests { extern crate rand; - use quickcheck::{QuickCheck, StdGen, quickcheck}; use super::{ - StateFlags, State, push_inst_ptr, - write_varu32, read_varu32, write_vari32, read_vari32, + push_inst_ptr, read_vari32, read_varu32, write_vari32, write_varu32, State, StateFlags, }; + use quickcheck::{quickcheck, QuickCheck, StdGen}; #[test] fn prop_state_encode_decode() { @@ -1871,10 +1835,11 @@ mod tests { for &ip in ips.iter() { push_inst_ptr(&mut data, &mut prev, ip); } - let state = State { data: data.into_boxed_slice() }; + let state = State { + data: data.into_boxed_slice(), + }; - let expected: Vec = - ips.into_iter().map(|ip| ip as usize).collect(); + let expected: Vec = ips.into_iter().map(|ip| ip as usize).collect(); let got: Vec = state.inst_ptrs().collect(); expected == got && state.flags() == StateFlags(flags) } From 7e655b7c3e699357727541add21c8fb6f5a64bf6 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Wed, 26 Sep 2018 13:59:13 +0200 Subject: [PATCH 08/10] chore: Fix unused_import in benchmarks --- bench/src/misc.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bench/src/misc.rs b/bench/src/misc.rs index 69ea21f842..2786408248 100644 --- a/bench/src/misc.rs +++ b/bench/src/misc.rs @@ -14,7 +14,9 @@ use std::iter::repeat; use test::Bencher; -use {Regex, RegexSet, Text}; +#[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))] +use RegexSet; +use {Regex, Text}; #[cfg(not(feature = "re-onig"))] #[cfg(not(feature = "re-pcre1"))] From 6882158d920aa77ce6ae733f1f365d4d656ccdd6 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Fri, 7 Sep 2018 18:36:04 +0200 Subject: [PATCH 09/10] perf: Avoid storing compiled dfa states twice --- src/dfa.rs | 105 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 84 insertions(+), 21 deletions(-) diff --git a/src/dfa.rs b/src/dfa.rs index 1d9cdc51a6..5b1a750cc3 100644 --- a/src/dfa.rs +++ b/src/dfa.rs @@ -47,15 +47,17 @@ implementation.) */ use std::borrow::Borrow; -use std::collections::HashMap; use std::fmt; use std::iter::repeat; use std::mem; +use std::sync::Arc; use exec::ProgramCache; use prog::{Inst, Program}; use sparse::SparseSet; +use self::state_map::StateMap; + /// Return true if and only if the given program can be executed by a DFA. /// /// Generally, a DFA is always possible. A pathological case where it is not @@ -118,7 +120,7 @@ struct CacheInner { /// things, we just pass indexes around manually. The performance impact of /// this is probably an instruction or two in the inner loop. However, on /// 64 bit, each StatePtr is half the size of a *State. - compiled: HashMap, + compiled: StateMap, /// The transition table. /// /// The transition table is laid out in row-major order, where states are @@ -135,9 +137,6 @@ struct CacheInner { /// bytes that never discriminate a distinct path through the DFA from each /// other. trans: Transitions, - /// Our set of states. Note that `StatePtr / num_byte_classes` indexes - /// this Vec rather than just a `StatePtr`. - states: Vec, /// A set of cached start states, which are limited to the number of /// permutations of flags set just before the initial byte of input. (The /// index into this vec is a `EmptyFlags`.) @@ -270,8 +269,8 @@ impl Result { /// it is packed into a single byte; Otherwise the byte 128 (-128 as an i8) /// is coded as a flag, followed by 4 bytes encoding the delta. #[derive(Clone, Eq, Hash, PartialEq)] -struct State { - data: Box<[u8]>, +pub struct State { + data: Arc<[u8]>, } impl Borrow<[u8]> for State { @@ -280,6 +279,13 @@ impl Borrow<[u8]> for State { } } +impl State { + fn heap_size(&self) -> usize { + // 2 * Reference counters + 2 * mem::size_of::() + self.data.len() + } +} + /// `InstPtr` is a 32 bit pointer into a sequence of opcodes (i.e., it indexes /// an NFA state). /// @@ -437,9 +443,8 @@ impl Cache { let starts = vec![STATE_UNKNOWN; 256]; let mut cache = Cache { inner: CacheInner { - compiled: HashMap::new(), + compiled: StateMap::new(), trans: Transitions::new(num_byte_classes), - states: vec![], start_states: starts, stack: vec![], flush_count: 0, @@ -1157,7 +1162,11 @@ impl<'a> Fsm<'a> { Some(v) => v, } // In the cache? Cool. Done. - if let Some(&si) = self.cache.compiled.get(&self.cache.insts_scratch_space[..]) { + if let Some(si) = self + .cache + .compiled + .get_ptr(&self.cache.insts_scratch_space[..]) + { return Some(si); } @@ -1170,7 +1179,7 @@ impl<'a> Fsm<'a> { } let key = State { - data: self.cache.insts_scratch_space.clone().into_boxed_slice(), + data: Arc::from(&self.cache.insts_scratch_space[..]), }; // Allocate room for our state and add it. self.add_state(key) @@ -1246,7 +1255,7 @@ impl<'a> Fsm<'a> { /// This returns false if the cache is not cleared and the DFA should /// give up. fn clear_cache_and_save(&mut self, current_state: Option<&mut StatePtr>) -> bool { - if self.cache.states.is_empty() { + if self.cache.compiled.is_empty() { // Nothing to clear... return true; } @@ -1276,7 +1285,7 @@ impl<'a> Fsm<'a> { // 10 or fewer bytes per state. // Additionally, we permit the cache to be flushed a few times before // caling it quits. - let nstates = self.cache.states.len(); + let nstates = self.cache.compiled.len(); if self.cache.flush_count >= 3 && self.at >= self.last_cache_flush && (self.at - self.last_cache_flush) <= 10 * nstates @@ -1296,7 +1305,6 @@ impl<'a> Fsm<'a> { }; self.cache.reset_size(); self.cache.trans.clear(); - self.cache.states.clear(); self.cache.compiled.clear(); for s in &mut self.cache.start_states { *s = STATE_UNKNOWN; @@ -1316,7 +1324,7 @@ impl<'a> Fsm<'a> { fn restore_state(&mut self, state: State) -> Option { // If we've already stored this state, just return a pointer to it. // None will be the wiser. - if let Some(&si) = self.cache.compiled.get(&state) { + if let Some(si) = self.cache.compiled.get_ptr(&state.data) { return Some(si); } self.add_state(state) @@ -1451,7 +1459,10 @@ impl<'a> Fsm<'a> { /// Returns a reference to a State given a pointer to it. fn state(&self, si: StatePtr) -> &State { - &self.cache.states[si as usize / self.num_byte_classes()] + self.cache + .compiled + .get_state(si as usize / self.num_byte_classes()) + .unwrap() } /// Adds the given state to the DFA. @@ -1483,14 +1494,12 @@ impl<'a> Fsm<'a> { // Finally, put our actual state on to our heap of states and index it // so we can find it later. self.cache.size += self.cache.trans.state_heap_size() - + (2 * state.data.len()) + + state.heap_size() + (2 * mem::size_of::()) + mem::size_of::(); - self.cache.states.push(state.clone()); self.cache.compiled.insert(state, si); // Transition table and set of states and map should all be in sync. - debug_assert!(self.cache.states.len() == self.cache.trans.num_states()); - debug_assert!(self.cache.states.len() == self.cache.compiled.len()); + debug_assert!(self.cache.compiled.len() == self.cache.trans.num_states()); Some(si) } @@ -1818,10 +1827,64 @@ fn read_varu32(data: &[u8]) -> (u32, usize) { (0, 0) } +mod state_map { + use std::collections::HashMap; + + use super::{State, StatePtr}; + + #[derive(Debug)] + pub struct StateMap { + /// The keys are not actually static but rely on always pointing to a buffer in `states` + /// which will never be moved except when clearing the map or on drop, in which case the + /// keys of this map will be removed before + map: HashMap, + /// Our set of states. Note that `StatePtr / num_byte_classes` indexes + /// this Vec rather than just a `StatePtr`. + states: Vec, + } + + impl StateMap { + pub fn new() -> StateMap { + StateMap { + map: HashMap::new(), + states: Vec::new(), + } + } + + pub fn len(&self) -> usize { + self.states.len() + } + + pub fn is_empty(&self) -> bool { + self.states.is_empty() + } + + pub fn get_ptr(&self, index: &[u8]) -> Option { + self.map.get(index).cloned() + } + + pub fn get_state(&self, index: usize) -> Option<&State> { + self.states.get(index) + } + + pub fn insert(&mut self, state: State, si: StatePtr) { + self.map.insert(state.clone(), si); + self.states.push(state); + } + + pub fn clear(&mut self) { + self.map.clear(); + self.states.clear(); + } + } +} + #[cfg(test)] mod tests { extern crate rand; + use std::sync::Arc; + use super::{ push_inst_ptr, read_vari32, read_varu32, write_vari32, write_varu32, State, StateFlags, }; @@ -1836,7 +1899,7 @@ mod tests { push_inst_ptr(&mut data, &mut prev, ip); } let state = State { - data: data.into_boxed_slice(), + data: Arc::from(&data[..]), }; let expected: Vec = ips.into_iter().map(|ip| ip as usize).collect(); From f7a5b10fd20a3abe353f964da53f265e7b449cd3 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Mon, 8 Oct 2018 16:19:57 +0200 Subject: [PATCH 10/10] Bump minimum rust version to 1.21 Necessary for `From<&[T]> for Arc` --- .travis.yml | 2 +- README.md | 2 +- ci/script.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index c49ba699fa..05db45d490 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ dist: trusty sudo: required language: rust rust: - - 1.20.0 + - 1.21.0 - stable - beta - nightly diff --git a/README.md b/README.md index 82420bd377..11c1d8c50f 100644 --- a/README.md +++ b/README.md @@ -210,7 +210,7 @@ recommended for general use. ### Minimum Rust version policy -This crate's minimum supported `rustc` version is `1.20.0`. +This crate's minimum supported `rustc` version is `1.21.0`. The current **tentative** policy is that the minimum Rust version required to use this crate can be increased in minor version updates. For example, if diff --git a/ci/script.sh b/ci/script.sh index eea7d39d2a..ef440336dc 100755 --- a/ci/script.sh +++ b/ci/script.sh @@ -3,7 +3,7 @@ # This is the main CI script for testing the regex crate and its sub-crates. set -ex -MSRV="1.20.0" +MSRV="1.21.0" # If we're building on 1.20, then lazy_static 1.2 will fail to build since it # updated its MSRV to 1.24.1. In this case, we force the use of lazy_static 1.1