diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore index d71c278..f3d681d 100644 --- a/.gitignore +++ b/.gitignore @@ -75,3 +75,6 @@ tests/rustdoc-gui/src/**.lock ## Rust files main.rs Cargo.lock + +## Nix +/.direnv \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 10ab8cc..cb23e98 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fastembed" -version = "4.5.0" +version = "4.6.0" edition = "2021" description = "Rust implementation of https://github.com/qdrant/fastembed" license = "Apache-2.0" diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..baa0183 --- /dev/null +++ b/flake.lock @@ -0,0 +1,62 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "ref": "main", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1740367490, + "narHash": "sha256-WGaHVAjcrv+Cun7zPlI41SerRtfknGQap281+AakSAw=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "0196c0175e9191c474c26ab5548db27ef5d34b05", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..1689a29 --- /dev/null +++ b/flake.nix @@ -0,0 +1,20 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils?ref=main"; + }; + + outputs = inputs: + inputs.flake-utils.lib.eachDefaultSystem (system: + let + pkgs = inputs.nixpkgs.legacyPackages.${system}; + + in { + devShells.default = pkgs.mkShell { + packages = (with pkgs; [ + openssl + pkg-config + ]); + }; + }); +} diff --git a/src/image_embedding/impl.rs b/src/image_embedding/impl.rs index c526e30..ece71ba 100644 --- a/src/image_embedding/impl.rs +++ b/src/image_embedding/impl.rs @@ -3,6 +3,7 @@ use hf_hub::{ api::sync::{ApiBuilder, ApiRepo}, Cache, }; +use image::DynamicImage; use ndarray::{Array3, ArrayView3}; use ort::{ session::{builder::GraphOptimizationLevel, Session}, @@ -10,7 +11,7 @@ use ort::{ }; #[cfg(feature = "hf-hub")] use std::path::PathBuf; -use std::{path::Path, thread::available_parallelism}; +use std::{io::Cursor, path::Path, thread::available_parallelism}; use crate::{ common::normalize, models::image_embedding::models_list, Embedding, ImageEmbeddingModel, @@ -132,14 +133,12 @@ impl ImageEmbedding { .expect("Model not found.") } - /// Method to generate image embeddings for a Vec of image path - // Generic type to accept String, &str, OsString, &OsStr - pub fn embed + Send + Sync>( + /// Method to generate image embeddings for a Vec of image bytes + pub fn embed_bytes( &self, - images: Vec, + images: &[&[u8]], batch_size: Option, ) -> anyhow::Result> { - // Determine the batch size, default if not specified let batch_size = batch_size.unwrap_or(DEFAULT_BATCH_SIZE); let output = images @@ -149,72 +148,47 @@ impl ImageEmbedding { let inputs = batch .iter() .map(|img| { - let img = image::ImageReader::open(img)? + image::ImageReader::new(Cursor::new(img)) + .with_guessed_format()? .decode() - .map_err(|err| anyhow!("image decode: {}", err))?; - let pixels = self.preprocessor.transform(TransformData::Image(img))?; - match pixels { - TransformData::NdArray(array) => Ok(array), - _ => Err(anyhow!("Preprocessor configuration error!")), - } + .map_err(|err| anyhow!("image decode: {}", err)) }) - .collect::>>>()?; - - // Extract the batch size - let inputs_view: Vec> = - inputs.iter().map(|img| img.view()).collect(); - let pixel_values_array = ndarray::stack(ndarray::Axis(0), &inputs_view)?; + .collect::>()?; - let input_name = self.session.inputs[0].name.clone(); - let session_inputs = ort::inputs![ - input_name => Value::from_array(pixel_values_array)?, - ]?; + self.embed_images(inputs) + }) + .collect::>>()? + .into_iter() + .flatten() + .collect(); - let outputs = self.session.run(session_inputs)?; + Ok(output) + } - // Try to get the only output key - // If multiple, then default to few known keys `image_embeds` and `last_hidden_state` - let last_hidden_state_key = match outputs.len() { - 1 => vec![outputs.keys().next().unwrap()], - _ => vec!["image_embeds", "last_hidden_state"], - }; + /// Method to generate image embeddings for a Vec of image path + // Generic type to accept String, &str, OsString, &OsStr + pub fn embed + Send + Sync>( + &self, + images: Vec, + batch_size: Option, + ) -> anyhow::Result> { + // Determine the batch size, default if not specified + let batch_size = batch_size.unwrap_or(DEFAULT_BATCH_SIZE); - // Extract tensor and handle different dimensionalities - let output_data = last_hidden_state_key + let output = images + .par_chunks(batch_size) + .map(|batch| { + // Encode the texts in the batch + let inputs = batch .iter() - .find_map(|&key| { - outputs - .get(key) - .and_then(|v| v.try_extract_tensor::().ok()) + .map(|img| { + image::ImageReader::open(img)? + .decode() + .map_err(|err| anyhow!("image decode: {}", err)) }) - .ok_or_else(|| anyhow!("Could not extract tensor from any known output key"))?; - let shape = output_data.shape(); - - let embeddings: Vec> = match shape.len() { - 3 => { - // For 3D output [batch_size, sequence_length, hidden_size] - // Take only the first token, sequence_length[0] (CLS token), embedding - // and return [batch_size, hidden_size] - (0..shape[0]) - .map(|batch_idx| { - let cls_embedding = - output_data.slice(ndarray::s![batch_idx, 0, ..]).to_vec(); - normalize(&cls_embedding) - }) - .collect() - } - 2 => { - // For 2D output [batch_size, hidden_size] - output_data - .rows() - .into_iter() - .map(|row| normalize(row.as_slice().unwrap())) - .collect() - } - _ => return Err(anyhow!("Unexpected output tensor shape: {:?}", shape)), - }; - - Ok(embeddings) + .collect::>()?; + + self.embed_images(inputs) }) .collect::>>()? .into_iter() @@ -223,4 +197,73 @@ impl ImageEmbedding { Ok(output) } + + /// Embed DynamicImages + pub fn embed_images(&self, imgs: Vec) -> anyhow::Result> { + let inputs = imgs + .into_iter() + .map(|img| { + let pixels = self.preprocessor.transform(TransformData::Image(img))?; + match pixels { + TransformData::NdArray(array) => Ok(array), + _ => Err(anyhow!("Preprocessor configuration error!")), + } + }) + .collect::>>>()?; + + // Extract the batch size + let inputs_view: Vec> = inputs.iter().map(|img| img.view()).collect(); + let pixel_values_array = ndarray::stack(ndarray::Axis(0), &inputs_view)?; + + let input_name = self.session.inputs[0].name.clone(); + let session_inputs = ort::inputs![ + input_name => Value::from_array(pixel_values_array)?, + ]?; + + let outputs = self.session.run(session_inputs)?; + + // Try to get the only output key + // If multiple, then default to few known keys `image_embeds` and `last_hidden_state` + let last_hidden_state_key = match outputs.len() { + 1 => vec![outputs.keys().next().unwrap()], + _ => vec!["image_embeds", "last_hidden_state"], + }; + + // Extract tensor and handle different dimensionalities + let output_data = last_hidden_state_key + .iter() + .find_map(|&key| { + outputs + .get(key) + .and_then(|v| v.try_extract_tensor::().ok()) + }) + .ok_or_else(|| anyhow!("Could not extract tensor from any known output key"))?; + let shape = output_data.shape(); + + let embeddings = match shape.len() { + 3 => { + // For 3D output [batch_size, sequence_length, hidden_size] + // Take only the first token, sequence_length[0] (CLS token), embedding + // and return [batch_size, hidden_size] + (0..shape[0]) + .map(|batch_idx| { + let cls_embedding = + output_data.slice(ndarray::s![batch_idx, 0, ..]).to_vec(); + normalize(&cls_embedding) + }) + .collect() + } + 2 => { + // For 2D output [batch_size, hidden_size] + output_data + .rows() + .into_iter() + .map(|row| normalize(row.as_slice().unwrap())) + .collect() + } + _ => return Err(anyhow!("Unexpected output tensor shape: {:?}", shape)), + }; + + Ok(embeddings) + } }