From feaf1f67fe310ee6f290c8687cd5a6ab1e3c7b7c Mon Sep 17 00:00:00 2001 From: Lucas Biaggi Date: Mon, 4 Aug 2025 22:03:53 +0100 Subject: [PATCH] feat(fuzzy): add custom regex option This commit introduces a new custom_regex option under the fuzzy configuration. It allows users to specify a custom regular expression to be used by both implementations. --- lua/blink/cmp/config/fuzzy.lua | 3 +++ lua/blink/cmp/fuzzy/init.lua | 6 +++++- lua/blink/cmp/fuzzy/lua/init.lua | 14 +++++++++----- lua/blink/cmp/fuzzy/rust/lib.rs | 19 ++++++++++++++++--- 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/lua/blink/cmp/config/fuzzy.lua b/lua/blink/cmp/config/fuzzy.lua index 8a9945f61..ad6e54bfc 100644 --- a/lua/blink/cmp/config/fuzzy.lua +++ b/lua/blink/cmp/config/fuzzy.lua @@ -4,6 +4,7 @@ --- @field use_frecency boolean Tracks the most recently/frequently used items and boosts the score of the item. Note, this does not apply when using the Lua implementation. --- @field use_proximity boolean Boosts the score of items matching nearby words. Note, this does not apply when using the Lua implementation. --- @field use_unsafe_no_lock boolean UNSAFE!! When enabled, disables the lock and fsync when writing to the frecency database. This should only be used on unsupported platforms (i.e. alpine termux). Note, this does not apply when using the Lua implementation. +--- @field custom_regex? string Override the default regex with a custom pattern for word matching. --- @field sorts blink.cmp.Sort[] Controls which sorts to use and in which order. --- @field prebuilt_binaries blink.cmp.PrebuiltBinariesConfig @@ -37,6 +38,7 @@ local fuzzy = { use_frecency = true, use_proximity = true, use_unsafe_no_lock = false, + custom_regex = nil, sorts = { 'score', 'sort_text' }, prebuilt_binaries = { download = true, @@ -65,6 +67,7 @@ function fuzzy.validate(config) use_frecency = { config.use_frecency, 'boolean' }, use_proximity = { config.use_proximity, 'boolean' }, use_unsafe_no_lock = { config.use_unsafe_no_lock, 'boolean' }, + custom_regex = { config.custom_regex, { 'string', 'nil' } }, sorts = { config.sorts, function(sorts) diff --git a/lua/blink/cmp/fuzzy/init.lua b/lua/blink/cmp/fuzzy/init.lua index fa218e278..bce72398d 100644 --- a/lua/blink/cmp/fuzzy/init.lua +++ b/lua/blink/cmp/fuzzy/init.lua @@ -20,7 +20,11 @@ end function fuzzy.init_db() if fuzzy.has_init_db then return end - fuzzy.implementation.init_db(vim.fn.stdpath('data') .. '/blink/cmp/fuzzy.db', config.use_unsafe_no_lock) + fuzzy.implementation.init_db( + vim.fn.stdpath('data') .. '/blink/cmp/fuzzy.db', + config.use_unsafe_no_lock, + config.fuzzy.custom_regex + ) vim.api.nvim_create_autocmd('VimLeavePre', { callback = fuzzy.implementation.destroy_db, diff --git a/lua/blink/cmp/fuzzy/lua/init.lua b/lua/blink/cmp/fuzzy/lua/init.lua index ed7957cce..1de2730f9 100644 --- a/lua/blink/cmp/fuzzy/lua/init.lua +++ b/lua/blink/cmp/fuzzy/lua/init.lua @@ -1,8 +1,12 @@ +local config_fuzzy = require('blink.cmp.config').fuzzy local match = require('blink.cmp.fuzzy.lua.match') local match_indices = require('blink.cmp.fuzzy.lua.match_indices') local get_keyword_range = require('blink.cmp.fuzzy.lua.keyword').get_keyword_range local guess_keyword_range = require('blink.cmp.fuzzy.lua.keyword').guess_keyword_range +local words_regex = vim.regex( + [[\%(-\?\d\+\%(\.\d\+\)\?\|\h\%(\w\|á\|Á\|é\|É\|í\|Í\|ó\|Ó\|ú\|Ú\)*\%(-\%(\w\|á\|Á\|é\|É\|í\|Í\|ó\|Ó\|ú\|Ú\)*\)*\)]] +) --- @type blink.cmp.FuzzyImplementation --- @diagnostic disable-next-line: missing-fields local fuzzy = { @@ -10,13 +14,13 @@ local fuzzy = { provider_items = {}, } -function fuzzy.init_db() end +function fuzzy.init_db() + if config_fuzzy.custom_regex then words_regex = vim.regex(config_fuzzy.custom_regex) end +end + function fuzzy.destroy_db() end -function fuzzy.access() end -local words_regex = vim.regex( - [[\%(-\?\d\+\%(\.\d\+\)\?\|\h\%(\w\|á\|Á\|é\|É\|í\|Í\|ó\|Ó\|ú\|Ú\)*\%(-\%(\w\|á\|Á\|é\|É\|í\|Í\|ó\|Ó\|ú\|Ú\)*\)*\)]] -) +function fuzzy.access() end --- Takes ~0.25ms for 1200 characters split over 40 lines function fuzzy.get_words(text) diff --git a/lua/blink/cmp/fuzzy/rust/lib.rs b/lua/blink/cmp/fuzzy/rust/lib.rs index 4dfad3816..f771ba82c 100644 --- a/lua/blink/cmp/fuzzy/rust/lib.rs +++ b/lua/blink/cmp/fuzzy/rust/lib.rs @@ -17,13 +17,24 @@ mod keyword; mod lsp_item; mod sort; -static REGEX: LazyLock = - LazyLock::new(|| Regex::new(r"[\p{L}_][\p{L}0-9_\\-]{2,}").unwrap()); +static REGEX: LazyLock> = + LazyLock::new(|| RwLock::new(Regex::new(r"[\p{L}0-9_\\-]{2,}").unwrap())); static FRECENCY: LazyLock>> = LazyLock::new(|| RwLock::new(None)); static HAYSTACKS_BY_PROVIDER: LazyLock>>> = LazyLock::new(|| RwLock::new(HashMap::new())); -pub fn init_db(_: &Lua, (db_path, use_unsafe_no_lock): (String, bool)) -> LuaResult { +fn update_regex(new_pattern: &str) { + let mut regex = REGEX.write().unwrap(); + *regex = Regex::new(new_pattern).unwrap(); +} + +pub fn init_db( + _: &Lua, + (db_path, use_unsafe_no_lock, custom_regex): (String, bool, Option), +) -> LuaResult { + if let Some(regex) = custom_regex { + update_regex(®ex); + } let mut frecency = FRECENCY.write().map_err(|_| Error::AcquireFrecencyLock)?; if frecency.is_some() { return Ok(false); @@ -211,6 +222,8 @@ pub fn guess_edit_range( pub fn get_words(_: &Lua, text: mlua::String) -> LuaResult> { Ok(REGEX + .read() + .unwrap() .find_iter(&text.to_string_lossy()) .map(|m| m.as_str().to_string()) .filter(|s| s.len() < 512)