From ac2d0e1b33b4674ad9b26266ef4b828d7200ec0f Mon Sep 17 00:00:00 2001 From: Jonas Berlin Date: Mon, 28 Nov 2022 20:37:22 +0200 Subject: [PATCH 1/4] impl: optimize replacen loop The previous implementation didn't bail out of the replace loop when the limit was reached until one more than the total number of 'find' operations had completed. By moving the limit check to the end of the loop body, we execute only the number of 'find' operations that is necessary, instead of one extra. This optimization only applies to 'replacen' calls with a limit not equal to '0'. That includes 'replace' but not 'replace_all'. PR #930 --- src/re_bytes.rs | 12 ++++++------ src/re_unicode.rs | 12 ++++++------ tests/replace.rs | 18 ++++++++++++++++++ 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/re_bytes.rs b/src/re_bytes.rs index d71969257b..07e9f98acc 100644 --- a/src/re_bytes.rs +++ b/src/re_bytes.rs @@ -496,12 +496,12 @@ impl Regex { let mut new = Vec::with_capacity(text.len()); let mut last_match = 0; for (i, m) in it { - if limit > 0 && i >= limit { - break; - } new.extend_from_slice(&text[last_match..m.start()]); new.extend_from_slice(&rep); last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } } new.extend_from_slice(&text[last_match..]); return Cow::Owned(new); @@ -516,14 +516,14 @@ impl Regex { let mut new = Vec::with_capacity(text.len()); let mut last_match = 0; for (i, cap) in it { - if limit > 0 && i >= limit { - break; - } // unwrap on 0 is OK because captures only reports matches let m = cap.get(0).unwrap(); new.extend_from_slice(&text[last_match..m.start()]); rep.replace_append(&cap, &mut new); last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } } new.extend_from_slice(&text[last_match..]); Cow::Owned(new) diff --git a/src/re_unicode.rs b/src/re_unicode.rs index 60d81a7d95..197510ea0d 100644 --- a/src/re_unicode.rs +++ b/src/re_unicode.rs @@ -554,12 +554,12 @@ impl Regex { let mut new = String::with_capacity(text.len()); let mut last_match = 0; for (i, m) in it { - if limit > 0 && i >= limit { - break; - } new.push_str(&text[last_match..m.start()]); new.push_str(&rep); last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } } new.push_str(&text[last_match..]); return Cow::Owned(new); @@ -574,14 +574,14 @@ impl Regex { let mut new = String::with_capacity(text.len()); let mut last_match = 0; for (i, cap) in it { - if limit > 0 && i >= limit { - break; - } // unwrap on 0 is OK because captures only reports matches let m = cap.get(0).unwrap(); new.push_str(&text[last_match..m.start()]); rep.replace_append(&cap, &mut new); last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } } new.push_str(&text[last_match..]); Cow::Owned(new) diff --git a/tests/replace.rs b/tests/replace.rs index 1dc6106357..d65be072ff 100644 --- a/tests/replace.rs +++ b/tests/replace.rs @@ -228,3 +228,21 @@ replace!( bytes!(&std::borrow::Cow::<'_, [u8]>::Owned(vec![b'Z'])), "age: Z6" ); + +#[test] +fn replacen_no_captures() { + let re = regex!(r"[0-9]"); + assert_eq!( + re.replacen(text!("age: 1234"), 2, t!("Z")), + text!("age: ZZ34") + ); +} + +#[test] +fn replacen_with_captures() { + let re = regex!(r"([0-9])"); + assert_eq!( + re.replacen(text!("age: 1234"), 2, t!("${1}Z")), + text!("age: 1Z2Z34") + ); +} From 9330ea50f5611cbf75b86f03278ffb1db5542a42 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 8 Dec 2022 19:40:24 +0200 Subject: [PATCH 2/4] ci: harden configuration This makes it so the permissions are locked down by default. The threat model here is something like, "what happens if an authorized party gains control of the non-PR CI configuration somehow." To be honest, I (BurntSushi) don't quite understand how that might happen without also the ability to set the permissions itself. But locking permissions down by default does seem like a good and sensible thing to do. Closes #932 --- .github/workflows/ci.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ac2ace4753..7c04a746e4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,27 @@ on: - master schedule: - cron: '00 01 * * *' + +# The section is needed to drop write-all permissions that are granted on +# `schedule` event. By specifying any permission explicitly all others are set +# to none. By using the principle of least privilege the damage a compromised +# workflow can do (because of an injection or compromised third party tool or +# action) is restricted. Currently the worklow doesn't need any additional +# permission except for pulling the code. Adding labels to issues, commenting +# on pull-requests, etc. may need additional permissions: +# +# Syntax for this section: +# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#permissions +# +# Reference for how to assign permissions on a job-by-job basis: +# https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs +# +# Reference for available permissions that we can enable if needed: +# https://docs.github.com/en/actions/security-guides/automatic-token-authentication#permissions-for-the-github_token +permissions: + # to fetch code (actions/checkout) + contents: read + jobs: test: name: test From 98c1b63ffe1d4d0c385137cf878e0e959454ce3d Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Mon, 9 Jan 2023 08:19:34 -0500 Subject: [PATCH 3/4] changelog: 1.7.1 --- CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 73e9e66599..466f5a9c92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,19 @@ +1.7.1 (2023-01-09) +================== +This release was done principally to try and fix the doc.rs rendering for the +regex crate. + +Performance improvements: + +* [PERF #930](https://github.com/rust-lang/regex/pull/930): + Optimize `replacen`. This also applies to `replace`, but not `replace_all`. + +Bug fixes: + +* [BUG #945](https://github.com/rust-lang/regex/issues/945): + Maybe fix rustdoc rendering by just bumping a new release? + + 1.7.0 (2022-11-05) ================== This release principally includes an upgrade to Unicode 15. From a9b2e02352db92ce1f6e5b7ecd41b8bbffbe161a Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Mon, 9 Jan 2023 08:19:50 -0500 Subject: [PATCH 4/4] 1.7.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index cafb65e70c..1dc7f5685c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex" -version = "1.7.0" #:version +version = "1.7.1" #:version authors = ["The Rust Project Developers"] license = "MIT OR Apache-2.0" readme = "README.md"