Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
teddy: port teddy searcher to std::arch
This commit ports the Teddy searcher to use std::arch and moves off the
portable SIMD vector API. Performance remains the same, and it looks
like the codegen is identical, which is great!

This also makes the `simd-accel` feature a no-op and adds a new
`unstable` feature which will enable the Teddy optimization. The `-C
target-feature` or `-C target-cpu` settings are no longer necessary,
since this will now do runtime target feature detection.

We also add a new `unstable` feature to the regex crate, which will
enable this new use of std::arch. Once enabled, the Teddy optimizations
becomes available automatically without any additional compile time
flags.
  • Loading branch information
BurntSushi committed Mar 13, 2018
commit 9ddc0b5b0a36e24e308490383b894a7f8452ebd9
16 changes: 12 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ memchr = "2.0.0"
thread_local = "0.3.2"
# For parsing regular expressions.
regex-syntax = { path = "regex-syntax", version = "0.5.1" }
# For accelerating text search.
simd = { version = "0.2.1", optional = true }
# For compiling UTF-8 decoding into automata.
utf8-ranges = "1.0.0"

Expand All @@ -45,10 +43,20 @@ quickcheck = { version = "0.6", default-features = false }
rand = "0.4"

[features]
# Enable to use the unstable pattern traits defined in std.
# We don't enable any features by default currently, but if the compiler
# supports a specific type of feature, then regex's build.rs might enable
# some default features.
default = []
# A blanket feature that governs whether unstable features are enabled or not.
# Unstable features are disabled by default, and typically rely on unstable
# features in rustc itself.
unstable = ["pattern"]
# Enable to use the unstable pattern traits defined in std. This is enabled
# by default if the unstable feature is enabled.
pattern = []
# Enable to use simd acceleration.
simd-accel = ["simd"]
# Note that this is deprecated and is a no-op.
simd-accel = []

[lib]
# There are no benchmarks in the library code itself
Expand Down
2 changes: 1 addition & 1 deletion bench/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ libc = "0.2"
onig = { version = "3", optional = true }
libpcre-sys = { version = "0.2", optional = true }
memmap = "0.6"
regex = { version = "0.2.0", path = "..", features = ["simd-accel"] }
regex = { version = "0.2.0", path = "..", features = ["unstable"] }
regex-syntax = { version = "0.5.0", path = "../regex-syntax" }
serde = "1"
serde_derive = "1"
Expand Down
26 changes: 26 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
use std::env;
use std::ffi::OsString;
use std::process::Command;

fn main() {
let rustc = env::var_os("RUSTC").unwrap_or(OsString::from("rustc"));
let output = Command::new(&rustc)
.arg("--version")
.output()
.unwrap()
.stdout;
let version = String::from_utf8(output).unwrap();

// If we're using nightly Rust, then we can enable vector optimizations.
// Note that these aren't actually activated unless the `nightly` feature
// is enabled.
//
// We also don't activate these if we've explicitly disabled auto
// optimizations. Disabling auto optimizations is intended for use in
// tests, so that we can reliably test fallback implementations.
if env::var_os("CARGO_CFG_REGEX_DISABLE_AUTO_OPTIMIZATIONS").is_none() {
if version.contains("nightly") {
println!("cargo:rustc-cfg=regex_runtime_teddy_ssse3");
}
}
}
2 changes: 1 addition & 1 deletion src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use compile::Compiler;
use dfa;
use error::Error;
use input::{ByteInput, CharInput};
use literals::LiteralSearcher;
use literal::LiteralSearcher;
use pikevm;
use prog::Program;
use re_builder::RegexOptions;
Expand Down
2 changes: 1 addition & 1 deletion src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::u32;

use syntax;

use literals::LiteralSearcher;
use literal::LiteralSearcher;
use prog::InstEmptyLook;
use utf8::{decode_utf8, decode_last_utf8};

Expand Down
18 changes: 8 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -520,14 +520,15 @@ another matching engine with fixed memory requirements.
#![deny(missing_docs)]
#![cfg_attr(test, deny(warnings))]
#![cfg_attr(feature = "pattern", feature(pattern))]
#![cfg_attr(feature = "simd-accel", feature(cfg_target_feature))]
#![cfg_attr(feature = "unstable", feature(target_feature, stdsimd))]

extern crate aho_corasick;
extern crate memchr;
extern crate thread_local;
#[macro_use] #[cfg(test)] extern crate quickcheck;
#[cfg(test)]
#[macro_use]
extern crate quickcheck;
extern crate regex_syntax as syntax;
#[cfg(feature = "simd-accel")] extern crate simd;
extern crate utf8_ranges;

pub use error::Error;
Expand Down Expand Up @@ -645,7 +646,7 @@ mod exec;
mod expand;
mod freqs;
mod input;
mod literals;
mod literal;
#[cfg(feature = "pattern")]
mod pattern;
mod pikevm;
Expand All @@ -655,12 +656,9 @@ mod re_bytes;
mod re_set;
mod re_trait;
mod re_unicode;
#[cfg(feature = "simd-accel")]
mod simd_accel;
#[cfg(not(feature = "simd-accel"))]
#[path = "simd_fallback/mod.rs"]
mod simd_accel;
mod sparse;
#[cfg(feature = "unstable")]
mod vector;

/// The `internal` module exists to support suspicious activity, such as
/// testing different matching engines and supporting the `regex-debug` CLI
Expand All @@ -670,6 +668,6 @@ pub mod internal {
pub use compile::Compiler;
pub use exec::{Exec, ExecBuilder};
pub use input::{Char, Input, CharInput, InputAt};
pub use literals::LiteralSearcher;
pub use literal::LiteralSearcher;
pub use prog::{Program, Inst, EmptyLook, InstRanges};
}
5 changes: 3 additions & 2 deletions src/literals.rs → src/literal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ use memchr::{memchr, memchr2, memchr3};
use syntax::hir::literal::{Literal, Literals};

use freqs::BYTE_FREQUENCIES;
use self::teddy_ssse3::Teddy;

use simd_accel::teddy128::{Teddy, is_teddy_128_available};
mod teddy_ssse3;

/// A prefix extracted from a compiled regular expression.
///
Expand Down Expand Up @@ -219,7 +220,7 @@ impl Matcher {
}
}
let is_aho_corasick_fast = sset.dense.len() == 1 && sset.all_ascii;
if is_teddy_128_available() && !is_aho_corasick_fast {
if Teddy::available() && !is_aho_corasick_fast {
// Only try Teddy if Aho-Corasick can't use memchr on an ASCII
// byte. Also, in its current form, Teddy doesn't scale well to
// lots of literals.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
use syntax::hir::literal::Literals;

pub fn is_teddy_128_available() -> bool {
false
}

#[derive(Debug, Clone)]
pub struct Teddy(());

Expand All @@ -15,6 +11,7 @@ pub struct Match {
}

impl Teddy {
pub fn available() -> bool { false }
pub fn new(_pats: &Literals) -> Option<Teddy> { None }
pub fn patterns(&self) -> &[Vec<u8>] { &[] }
pub fn len(&self) -> usize { 0 }
Expand Down
Loading