diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a7a01c9..7f7e64c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,13 +1,11 @@ name: "Cargo Tests" on: pull_request: - schedule: - - cron: 0 0 * * * env: CARGO_TERM_COLOR: always RUSTFLAGS: "-Dwarnings" - ONNX_VERSION: v1.22.0 + ONNX_VERSION: v1.22.1 jobs: test: diff --git a/Cargo.toml b/Cargo.toml index d3ee71a..6d91e01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fastembed" -version = "5.0.0" +version = "5.0.1" edition = "2021" description = "Library for generating vector embeddings, reranking locally." license = "Apache-2.0" diff --git a/README.md b/README.md index b65e586..b3509ec 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,10 @@ - [**jinaai/jina-reranker-v1-turbo-en**](https://huggingface.co/jinaai/jina-reranker-v1-turbo-en) - [**jinaai/jina-reranker-v2-base-multiligual**](https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual) +## ✊ Support + +To support the library, please donate to our primary upstream dependency, [`ort`](https://github.com/pykeio/ort?tab=readme-ov-file#-sponsor-ort) - The Rust wrapper for the ONNX runtime. + ## 🚀 Installation Run the following in your project directory: @@ -73,7 +77,7 @@ Or add the following line to your Cargo.toml: ```toml [dependencies] -fastembed = "4" +fastembed = "5" ``` ## 📖 Usage @@ -180,10 +184,6 @@ println!("Rerank result: {:?}", results); Alternatively, local model files can be used for inference via the `try_new_from_user_defined(...)` methods of respective structs. -## ✊ Support - -To support the library, please donate to our primary upstream dependency, [`ort`](https://github.com/pykeio/ort?tab=readme-ov-file#-sponsor-ort) - The Rust wrapper for the ONNX runtime. - ## 📄 LICENSE -Apache 2.0 © [2024](https://github.com/Anush008/fastembed-rs/blob/main/LICENSE) +[Apache 2.0](https://github.com/Anush008/fastembed-rs/blob/main/LICENSE) diff --git a/src/output/embedding_output.rs b/src/output/embedding_output.rs index 384d918..e94316b 100644 --- a/src/output/embedding_output.rs +++ b/src/output/embedding_output.rs @@ -10,7 +10,7 @@ use super::{OutputKey, OutputPrecedence}; /// pooling etc. This struct should contain all the necessary information for the /// post-processing to be performed. pub struct SingleBatchOutput { - pub outputs: std::collections::HashMap, + pub outputs: std::collections::BTreeMap, pub attention_mask_array: Array2, } @@ -26,7 +26,14 @@ impl SingleBatchOutput { let ort_output: &ort::value::Value = precedence .key_precedence() .find_map(|key| match key { - OutputKey::OnlyOne => self.outputs.values().next(), + // Only select the sole output if and only if there is exactly one. + OutputKey::OnlyOne => { + if self.outputs.len() == 1 { + self.outputs.values().next() + } else { + None + } + } OutputKey::ByOrder(idx) => self.outputs.values().nth(*idx), OutputKey::ByName(name) => self.outputs.get(*name), }) diff --git a/tests/embeddings.rs b/tests/embeddings.rs index 900b57e..70e7772 100644 --- a/tests/embeddings.rs +++ b/tests/embeddings.rs @@ -659,3 +659,24 @@ fn test_allminilml6v2_match_python_counterpart() { assert!((expected - actual).abs() < tolerance); } } + +// Ref: https://github.com/Anush008/fastembed-rs/issues/171#issue-3209484009 +#[test] +fn clip_vit_b32_deterministic_across_calls() { + let q = "red car"; + let mut fe = TextEmbedding::try_new(InitOptions::new(EmbeddingModel::ClipVitB32)).unwrap(); + let mut first: Option> = None; + for i in 0..100 { + let vecs = fe.embed(vec![q], None).unwrap(); + if first.is_none() { + first = Some(vecs[0].clone()); + } else { + assert_eq!( + vecs[0], + *first.as_ref().unwrap(), + "Embedding changed after {} iterations", + i + ); + } + } +}