diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f5d88aa..89d8d9b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,17 +1,13 @@
 name: "Cargo Tests"
 on:
-    pull_request:
-      types:
-        - opened
-        - edited
-        - synchronize
-        - reopened
-    schedule:
-      - cron: 0 0 * * *
-  
+  pull_request:
+  schedule:
+    - cron: 0 0 * * *
+
 env:
   CARGO_TERM_COLOR: always
   RUSTFLAGS: "-Dwarnings"
+  ONNX_VERSION: v1.20.1
 
 jobs:
   test:
@@ -20,14 +16,40 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
+      - name: Restore Builds
+        id: cache-build-restore
+        uses: actions/cache/restore@v4
+        with:
+          key: '${{ runner.os }}-onnxruntime-${{ env.ONNX_VERSION }}'
+          path: |
+            onnxruntime/build/Linux/Release/
+
+      - name: Compile ONNX Runtime for Linux
+        if: steps.cache-build-restore.outputs.cache-hit != 'true'
+        run: |
+          echo Cloning ONNX Runtime repository...
+          git clone https://github.com/microsoft/onnxruntime --recursive --branch $ONNX_VERSION --single-branch --depth 1
+          cd onnxruntime
+          ./build.sh --update --build --config Release --parallel --compile_no_warning_as_error --skip_submodule_sync
+          cd ..
+
       - name: Cargo Test With Release Build
-        run: cargo test --release
+        run: ORT_LIB_LOCATION="$(pwd)/onnxruntime/build/Linux/Release" cargo test --release --no-default-features --features online
 
       - name: Cargo Test Offline
-        run: cargo test --no-default-features --features ort-download-binaries
+        run: ORT_LIB_LOCATION="$(pwd)/onnxruntime/build/Linux/Release" cargo test --no-default-features
 
       - name: Cargo Clippy
         run: cargo clippy
 
       - name: Cargo FMT
         run: cargo fmt --all -- --check
+
+      - name: Always Save Cache
+        id: cache-build-save
+        if: always() && steps.cache-build-restore.outputs.cache-hit != 'true'
+        uses: actions/cache/save@v4
+        with:
+          key: '${{ steps.cache-build-restore.outputs.cache-primary-key }}'
+          path: |
+            onnxruntime/build/Linux/Release/
diff --git a/Cargo.toml b/Cargo.toml
index 0d59b51..dc80b24 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "fastembed"
-version = "4.1.0"
+version = "4.3.0"
 edition = "2021"
 description = "Rust implementation of https://github.com/qdrant/fastembed"
 license = "Apache-2.0"
@@ -31,7 +31,7 @@ ort = { git = "https://github.com/pykeio/ort", rev = "2a9f66d", default-features
 ] }
 rayon = { version = "1.10", default-features = false }
 serde_json = { version = "1" }
-tokenizers = { version = "0.19", default-features = false, features = ["onig"] }
+tokenizers = { version = "0.21", default-features = false, features = ["onig"] }
 
 [features]
 default = ["ort-download-binaries", "online"]
diff --git a/README.md b/README.md
index 96f6c06..790363b 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,7 @@ The default model is Flag Embedding, which is top of the [MTEB](https://huggingf
 - [**BAAI/bge-small-en-v1.5**](https://huggingface.co/BAAI/bge-small-en-v1.5) - Default
 - [**sentence-transformers/all-MiniLM-L6-v2**](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)
 - [**mixedbread-ai/mxbai-embed-large-v1**](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1)
+- [**Qdrant/clip-ViT-B-32-text**](https://huggingface.co/Qdrant/clip-ViT-B-32-text) - pairs with the image model clip-ViT-B-32-vision for image-to-text search
   
 <details>
   <summary>Click to see full List</summary>
@@ -39,7 +40,7 @@ The default model is Flag Embedding, which is top of the [MTEB](https://huggingf
 - [**sentence-transformers/paraphrase-MiniLM-L12-v2**](https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L12-v2)
 - [**sentence-transformers/paraphrase-multilingual-mpnet-base-v2**](https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2)
 - [**nomic-ai/nomic-embed-text-v1**](https://huggingface.co/nomic-ai/nomic-embed-text-v1)
-- [**nomic-ai/nomic-embed-text-v1.5**](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5)
+- [**nomic-ai/nomic-embed-text-v1.5**](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) - pairs with the image model nomic-embed-vision-v1.5 for image-to-text search
 - [**intfloat/multilingual-e5-small**](https://huggingface.co/intfloat/multilingual-e5-small)
 - [**intfloat/multilingual-e5-base**](https://huggingface.co/intfloat/multilingual-e5-base)
 - [**intfloat/multilingual-e5-large**](https://huggingface.co/intfloat/multilingual-e5-large)
@@ -58,6 +59,7 @@ The default model is Flag Embedding, which is top of the [MTEB](https://huggingf
 - [**Qdrant/resnet50-onnx**](https://huggingface.co/Qdrant/resnet50-onnx)
 - [**Qdrant/Unicom-ViT-B-16**](https://huggingface.co/Qdrant/Unicom-ViT-B-16)
 - [**Qdrant/Unicom-ViT-B-32**](https://huggingface.co/Qdrant/Unicom-ViT-B-32)
+- [**nomic-ai/nomic-embed-vision-v1.5**](https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5)
 
 ### Reranking
 
@@ -158,23 +160,17 @@ println!("Rerank result: {:?}", results);
 
 Alternatively, local model files can be used for inference via the `try_new_from_user_defined(...)` methods of respective structs.
 
-## 🚒 Under the hood
+## ✊ Support
 
-### Why fast?
+To support the library, please consider donating to our primary upstream dependency, [`ort`](https://github.com/pykeio/ort?tab=readme-ov-file#-sponsor-ort) - The Rust wrapper for the ONNX runtime.
 
-It's important we justify the "fast" in FastEmbed. FastEmbed is fast because:
+## ⚙️ Under the hood
 
-1. Quantized model weights
-2. ONNX Runtime which allows for inference on CPU, GPU, and other dedicated runtimes
+It's important we justify the "fast" in FastEmbed. FastEmbed is fast because of:
 
-### Why light?
-
-1. No hidden dependencies via Huggingface Transformers
-
-### Why accurate?
-
-1. Better than OpenAI Ada-002
-2. Top of the Embedding leaderboards e.g. [MTEB](https://huggingface.co/spaces/mteb/leaderboard)
+1. Quantized model weights.
+2. ONNX Runtime which allows for inference on CPU, GPU, and other dedicated runtimes.
+3. No hidden dependencies via Huggingface Transformers.
 
 ## 📄 LICENSE
 
diff --git a/src/image_embedding/impl.rs b/src/image_embedding/impl.rs
index 230ce49..fe6f979 100644
--- a/src/image_embedding/impl.rs
+++ b/src/image_embedding/impl.rs
@@ -4,7 +4,10 @@ use hf_hub::{
     Cache,
 };
 use ndarray::{Array3, ArrayView3};
-use ort::{GraphOptimizationLevel, Session, Value};
+use ort::{
+    session::{builder::GraphOptimizationLevel, Session},
+    value::Value,
+};
 #[cfg(feature = "online")]
 use std::path::PathBuf;
 use std::{path::Path, thread::available_parallelism};
@@ -14,6 +17,8 @@ use crate::{
     ModelInfo,
 };
 use anyhow::anyhow;
+#[cfg(feature = "online")]
+use anyhow::Context;
 
 #[cfg(feature = "online")]
 use super::ImageInitOptions;
@@ -49,13 +54,13 @@ impl ImageEmbedding {
 
         let preprocessor_file = model_repo
             .get("preprocessor_config.json")
-            .unwrap_or_else(|_| panic!("Failed to retrieve preprocessor_config.json"));
+            .context("Failed to retrieve preprocessor_config.json")?;
         let preprocessor = Compose::from_file(preprocessor_file)?;
 
         let model_file_name = ImageEmbedding::get_model_info(&model_name).model_file;
         let model_file_reference = model_repo
             .get(&model_file_name)
-            .unwrap_or_else(|_| panic!("Failed to retrieve {} ", model_file_name));
+            .context(format!("Failed to retrieve {}", model_file_name))?;
 
         let session = Session::builder()?
             .with_execution_providers(execution_providers)?
@@ -108,8 +113,7 @@ impl ImageEmbedding {
         let cache = Cache::new(cache_dir);
         let api = ApiBuilder::from_cache(cache)
             .with_progress(show_download_progress)
-            .build()
-            .unwrap();
+            .build()?;
 
         let repo = api.model(model.to_string());
         Ok(repo)
@@ -169,24 +173,52 @@ impl ImageEmbedding {
                 let outputs = self.session.run(session_inputs)?;
 
                 // Try to get the only output key
-                // If multiple, then default to `image_embeds`
+                // If multiple, then default to few known keys `image_embeds` and `last_hidden_state`
                 let last_hidden_state_key = match outputs.len() {
-                    1 => outputs.keys().next().unwrap(),
-                    _ => "image_embeds",
+                    1 => vec![outputs.keys().next().unwrap()],
+                    _ => vec!["image_embeds", "last_hidden_state"],
                 };
 
-                // Extract and normalize embeddings
-                let output_data = outputs[last_hidden_state_key].try_extract_tensor::<f32>()?;
-
-                let embeddings: Vec<Vec<f32>> = output_data
-                    .rows()
-                    .into_iter()
-                    .map(|row| normalize(row.as_slice().unwrap()))
-                    .collect();
+                // Extract tensor and handle different dimensionalities
+                let output_data = last_hidden_state_key
+                    .iter()
+                    .find_map(|&key| {
+                        outputs
+                            .get(key)
+                            .and_then(|v| v.try_extract_tensor::<f32>().ok())
+                    })
+                    .ok_or_else(|| anyhow!("Could not extract tensor from any known output key"))?;
+                let shape = output_data.shape();
+
+                let embeddings: Vec<Vec<f32>> = match shape.len() {
+                    3 => {
+                        // For 3D output [batch_size, sequence_length, hidden_size]
+                        // Take only the first token, sequence_length[0] (CLS token), embedding
+                        // and return [batch_size, hidden_size]
+                        (0..shape[0])
+                            .map(|batch_idx| {
+                                let cls_embedding =
+                                    output_data.slice(ndarray::s![batch_idx, 0, ..]).to_vec();
+                                normalize(&cls_embedding)
+                            })
+                            .collect()
+                    }
+                    2 => {
+                        // For 2D output [batch_size, hidden_size]
+                        output_data
+                            .rows()
+                            .into_iter()
+                            .map(|row| normalize(row.as_slice().unwrap()))
+                            .collect()
+                    }
+                    _ => return Err(anyhow!("Unexpected output tensor shape: {:?}", shape)),
+                };
 
                 Ok(embeddings)
             })
-            .flat_map(|result: Result<Vec<Vec<f32>>, anyhow::Error>| result.unwrap())
+            .collect::<anyhow::Result<Vec<_>>>()?
+            .into_iter()
+            .flatten()
             .collect();
 
         Ok(output)
diff --git a/src/image_embedding/init.rs b/src/image_embedding/init.rs
index 85e9739..00818cf 100644
--- a/src/image_embedding/init.rs
+++ b/src/image_embedding/init.rs
@@ -1,6 +1,6 @@
 use std::path::{Path, PathBuf};
 
-use ort::{ExecutionProviderDispatch, Session};
+use ort::{execution_providers::ExecutionProviderDispatch, session::Session};
 
 use crate::{ImageEmbeddingModel, DEFAULT_CACHE_DIR};
 
diff --git a/src/lib.rs b/src/lib.rs
index 3bfd651..e8f316a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -62,7 +62,7 @@ mod reranking;
 mod sparse_text_embedding;
 mod text_embedding;
 
-pub use ort::ExecutionProviderDispatch;
+pub use ort::execution_providers::ExecutionProviderDispatch;
 
 pub use crate::common::{
     read_file_to_bytes, Embedding, Error, SparseEmbedding, TokenizerFiles, DEFAULT_CACHE_DIR,
diff --git a/src/models/image_embedding.rs b/src/models/image_embedding.rs
index a13cfdd..90f1410 100644
--- a/src/models/image_embedding.rs
+++ b/src/models/image_embedding.rs
@@ -12,6 +12,8 @@ pub enum ImageEmbeddingModel {
     UnicomVitB16,
     /// Qdrant/Unicom-ViT-B-32
     UnicomVitB32,
+    /// nomic-ai/nomic-embed-vision-v1.5
+    NomicEmbedVisionV15,
 }
 
 pub fn models_list() -> Vec<ModelInfo<ImageEmbeddingModel>> {
@@ -43,7 +45,14 @@ pub fn models_list() -> Vec<ModelInfo<ImageEmbeddingModel>> {
             description: String::from("Unicom Unicom-ViT-B-32 from open-metric-learning"),
             model_code: String::from("Qdrant/Unicom-ViT-B-32"),
             model_file: String::from("model.onnx"),
-        }
+        },
+        ModelInfo {
+            model: ImageEmbeddingModel::NomicEmbedVisionV15,
+            dim: 768,
+            description: String::from("Nomic NomicEmbedVisionV15"),
+            model_code: String::from("nomic-ai/nomic-embed-vision-v1.5"),
+            model_file: String::from("onnx/model.onnx"),
+        },
     ];
 
     // TODO: Use when out in stable
diff --git a/src/models/text_embedding.rs b/src/models/text_embedding.rs
index 549e21e..cddd979 100644
--- a/src/models/text_embedding.rs
+++ b/src/models/text_embedding.rs
@@ -63,6 +63,8 @@ pub enum EmbeddingModel {
     GTELargeENV15,
     /// Quantized Alibaba-NLP/gte-large-en-v1.5
     GTELargeENV15Q,
+    /// Qdrant/clip-ViT-B-32-text
+    ClipVitB32,
 }
 
 /// Centralized function to initialize the models map.
@@ -256,6 +258,13 @@ fn init_models_map() -> HashMap<EmbeddingModel, ModelInfo<EmbeddingModel>> {
             model_code: String::from("Alibaba-NLP/gte-large-en-v1.5"),
             model_file: String::from("onnx/model_quantized.onnx"),
         },
+        ModelInfo {
+            model: EmbeddingModel::ClipVitB32,
+            dim: 512,
+            description: String::from("CLIP text encoder based on ViT-B/32"),
+            model_code: String::from("Qdrant/clip-ViT-B-32-text"),
+            model_file: String::from("model.onnx"),
+        },
     ];
 
     // TODO: Use when out in stable
@@ -327,6 +336,8 @@ impl EmbeddingModel {
             EmbeddingModel::GTEBaseENV15Q => Some(Pooling::Cls),
             EmbeddingModel::GTELargeENV15 => Some(Pooling::Cls),
             EmbeddingModel::GTELargeENV15Q => Some(Pooling::Cls),
+
+            EmbeddingModel::ClipVitB32 => Some(Pooling::Mean),
         }
     }
 
diff --git a/src/output/embedding_output.rs b/src/output/embedding_output.rs
index d6ca543..69bfb92 100644
--- a/src/output/embedding_output.rs
+++ b/src/output/embedding_output.rs
@@ -1,5 +1,5 @@
 use ndarray::{Array2, ArrayView, Dim, IxDynImpl};
-use ort::Value;
+use ort::session::SessionOutputs;
 
 use crate::pooling;
 
@@ -11,7 +11,7 @@ use super::{OutputKey, OutputPrecedence};
 /// pooling etc. This struct should contain all the necessary information for the
 /// post-processing to be performed.
 pub struct SingleBatchOutput<'r, 's> {
-    pub session_outputs: ort::SessionOutputs<'r, 's>,
+    pub session_outputs: SessionOutputs<'r, 's>,
     pub attention_mask_array: Array2<i64>,
 }
 
@@ -23,19 +23,13 @@ impl SingleBatchOutput<'_, '_> {
     pub fn select_output<'a>(
         &'a self,
         precedence: &impl OutputPrecedence,
-    ) -> anyhow::Result<ArrayView<'a, f32, Dim<IxDynImpl>>> {
-        let ort_output: &Value = precedence
+    ) -> anyhow::Result<ArrayView<f32, Dim<IxDynImpl>>> {
+        let ort_output: &ort::value::Value = precedence
             .key_precedence()
             .find_map(|key| match key {
-                OutputKey::OnlyOne => {
-                    // Only export the value if there is only one output available.
-                    if self.session_outputs.len() == 1 {
-                        let key = self.session_outputs.keys().next().unwrap();
-                        self.session_outputs.get(key)
-                    } else {
-                        None
-                    }
-                }
+                OutputKey::OnlyOne => self
+                    .session_outputs
+                    .get(self.session_outputs.keys().nth(0)?),
                 OutputKey::ByOrder(idx) => {
                     let x = self
                         .session_outputs
diff --git a/src/reranking/impl.rs b/src/reranking/impl.rs
index d503b17..cc458fb 100644
--- a/src/reranking/impl.rs
+++ b/src/reranking/impl.rs
@@ -1,4 +1,10 @@
+#[cfg(feature = "online")]
+use anyhow::Context;
 use anyhow::Result;
+use ort::{
+    session::{builder::GraphOptimizationLevel, Session},
+    value::Value,
+};
 use std::thread::available_parallelism;
 
 #[cfg(feature = "online")]
@@ -10,7 +16,6 @@ use crate::{
 #[cfg(feature = "online")]
 use hf_hub::{api::sync::ApiBuilder, Cache};
 use ndarray::{s, Array};
-use ort::{GraphOptimizationLevel, Session, Value};
 use rayon::{iter::ParallelIterator, slice::ParallelSlice};
 use tokenizers::Tokenizer;
 
@@ -67,15 +72,16 @@ impl TextRerank {
         let model_repo = api.model(model_name.to_string());
 
         let model_file_name = TextRerank::get_model_info(&model_name).model_file;
-        let model_file_reference = model_repo
-            .get(&model_file_name)
-            .unwrap_or_else(|_| panic!("Failed to retrieve model file: {}", model_file_name));
+        let model_file_reference = model_repo.get(&model_file_name).context(format!(
+            "Failed to retrieve model file: {}",
+            model_file_name
+        ))?;
         let additional_files = TextRerank::get_model_info(&model_name).additional_files;
         for additional_file in additional_files {
-            let _additional_file_reference =
-                model_repo.get(&additional_file).unwrap_or_else(|_| {
-                    panic!("Failed to retrieve additional file: {}", additional_file)
-                });
+            let _additional_file_reference = model_repo.get(&additional_file).context(format!(
+                "Failed to retrieve additional file: {}",
+                additional_file
+            ))?;
         }
 
         let session = Session::builder()?
@@ -193,7 +199,9 @@ impl TextRerank {
 
                 Ok(scores)
             })
-            .flat_map(|result: Result<Vec<f32>, anyhow::Error>| result.unwrap())
+            .collect::<Result<Vec<_>>>()?
+            .into_iter()
+            .flatten()
             .collect();
 
         // Return top_n_result of type Vec<RerankResult> ordered by score in descending order, don't use binary heap
diff --git a/src/reranking/init.rs b/src/reranking/init.rs
index 1a8a555..3ca3d7c 100644
--- a/src/reranking/init.rs
+++ b/src/reranking/init.rs
@@ -1,6 +1,6 @@
 use std::path::{Path, PathBuf};
 
-use ort::{ExecutionProviderDispatch, Session};
+use ort::{execution_providers::ExecutionProviderDispatch, session::Session};
 use tokenizers::Tokenizer;
 
 use crate::{RerankerModel, TokenizerFiles, DEFAULT_CACHE_DIR};
diff --git a/src/sparse_text_embedding/impl.rs b/src/sparse_text_embedding/impl.rs
index 0440810..2bd4256 100644
--- a/src/sparse_text_embedding/impl.rs
+++ b/src/sparse_text_embedding/impl.rs
@@ -4,6 +4,8 @@ use crate::{
     models::sparse::{models_list, SparseModel},
     ModelInfo, SparseEmbedding,
 };
+#[cfg(feature = "online")]
+use anyhow::Context;
 use anyhow::Result;
 #[cfg(feature = "online")]
 use hf_hub::{
@@ -11,9 +13,8 @@ use hf_hub::{
     Cache,
 };
 use ndarray::{Array, CowArray};
+use ort::{session::Session, value::Value};
 #[cfg_attr(not(feature = "online"), allow(unused_imports))]
-use ort::GraphOptimizationLevel;
-use ort::{Session, Value};
 use rayon::{iter::ParallelIterator, slice::ParallelSlice};
 #[cfg(feature = "online")]
 use std::path::PathBuf;
@@ -35,6 +36,7 @@ impl SparseTextEmbedding {
     #[cfg(feature = "online")]
     pub fn try_new(options: SparseInitOptions) -> Result<Self> {
         use super::SparseInitOptions;
+        use ort::{session::builder::GraphOptimizationLevel, session::Session};
 
         let SparseInitOptions {
             model_name,
@@ -55,7 +57,7 @@ impl SparseTextEmbedding {
         let model_file_name = SparseTextEmbedding::get_model_info(&model_name).model_file;
         let model_file_reference = model_repo
             .get(&model_file_name)
-            .unwrap_or_else(|_| panic!("Failed to retrieve {} ", model_file_name));
+            .context(format!("Failed to retrieve {} ", model_file_name))?;
 
         let session = Session::builder()?
             .with_execution_providers(execution_providers)?
@@ -91,8 +93,7 @@ impl SparseTextEmbedding {
         let cache = Cache::new(cache_dir);
         let api = ApiBuilder::from_cache(cache)
             .with_progress(show_download_progress)
-            .build()
-            .unwrap();
+            .build()?;
 
         let repo = api.model(model.to_string());
         Ok(repo)
@@ -189,7 +190,9 @@ impl SparseTextEmbedding {
 
                 Ok(embeddings)
             })
-            .flat_map(|result: Result<Vec<SparseEmbedding>, anyhow::Error>| result.unwrap())
+            .collect::<Result<Vec<_>>>()?
+            .into_iter()
+            .flatten()
             .collect();
 
         Ok(output)
diff --git a/src/sparse_text_embedding/init.rs b/src/sparse_text_embedding/init.rs
index b81dfe8..3ac2348 100644
--- a/src/sparse_text_embedding/init.rs
+++ b/src/sparse_text_embedding/init.rs
@@ -1,6 +1,6 @@
 use std::path::{Path, PathBuf};
 
-use ort::{ExecutionProviderDispatch, Session};
+use ort::{execution_providers::ExecutionProviderDispatch, session::Session};
 use tokenizers::Tokenizer;
 
 use crate::{models::sparse::SparseModel, TokenizerFiles, DEFAULT_CACHE_DIR};
diff --git a/src/text_embedding/impl.rs b/src/text_embedding/impl.rs
index 16d3508..6caa170 100644
--- a/src/text_embedding/impl.rs
+++ b/src/text_embedding/impl.rs
@@ -9,12 +9,18 @@ use crate::{
     Embedding, EmbeddingModel, EmbeddingOutput, ModelInfo, QuantizationMode, SingleBatchOutput,
 };
 #[cfg(feature = "online")]
+use anyhow::Context;
+use anyhow::Result;
+#[cfg(feature = "online")]
 use hf_hub::{
     api::sync::{ApiBuilder, ApiRepo},
     Cache,
 };
 use ndarray::Array;
-use ort::{GraphOptimizationLevel, Session, Value};
+use ort::{
+    session::{builder::GraphOptimizationLevel, Session},
+    value::Value,
+};
 use rayon::{
     iter::{FromParallelIterator, ParallelIterator},
     slice::ParallelSlice,
@@ -36,7 +42,7 @@ impl TextEmbedding {
     ///
     /// Uses the total number of CPUs available as the number of intra-threads
     #[cfg(feature = "online")]
-    pub fn try_new(options: InitOptions) -> anyhow::Result<Self> {
+    pub fn try_new(options: InitOptions) -> Result<Self> {
         let InitOptions {
             model_name,
             execution_providers,
@@ -58,7 +64,7 @@ impl TextEmbedding {
         let model_file_name = &model_info.model_file;
         let model_file_reference = model_repo
             .get(model_file_name)
-            .unwrap_or_else(|_| panic!("Failed to retrieve {} ", model_file_name));
+            .context(format!("Failed to retrieve {}", model_file_name))?;
 
         // TODO: If more models need .onnx_data, implement a better way to handle this
         // Probably by adding `additional_files` field in the `ModelInfo` struct
@@ -94,7 +100,7 @@ impl TextEmbedding {
     pub fn try_new_from_user_defined(
         model: UserDefinedEmbeddingModel,
         options: InitOptionsUserDefined,
-    ) -> anyhow::Result<Self> {
+    ) -> Result<Self> {
         let InitOptionsUserDefined {
             execution_providers,
             max_length,
@@ -149,8 +155,7 @@ impl TextEmbedding {
         let cache = Cache::new(cache_dir);
         let api = ApiBuilder::from_cache(cache)
             .with_progress(show_download_progress)
-            .build()
-            .unwrap();
+            .build()?;
 
         let repo = api.model(model.to_string());
         Ok(repo)
@@ -162,7 +167,7 @@ impl TextEmbedding {
     }
 
     /// Get ModelInfo from EmbeddingModel
-    pub fn get_model_info(model: &EmbeddingModel) -> anyhow::Result<&ModelInfo<EmbeddingModel>> {
+    pub fn get_model_info(model: &EmbeddingModel) -> Result<&ModelInfo<EmbeddingModel>> {
         get_model_info(model).ok_or_else(|| {
             anyhow::Error::msg(format!(
                 "Model {model:?} not found. Please check if the model is supported \
@@ -197,7 +202,7 @@ impl TextEmbedding {
         &'e self,
         texts: Vec<S>,
         batch_size: Option<usize>,
-    ) -> anyhow::Result<EmbeddingOutput<'r, 's>>
+    ) -> Result<EmbeddingOutput<'r, 's>>
     where
         'e: 'r,
         'e: 's,
@@ -234,64 +239,63 @@ impl TextEmbedding {
                     anyhow::Error::msg(e.to_string()).context("Failed to encode the batch.")
                 })?;
 
-                // Extract the encoding length and batch size
-                let encoding_length = encodings[0].len();
-                let batch_size = batch.len();
-
-                let max_size = encoding_length * batch_size;
-
-                // Preallocate arrays with the maximum size
-                let mut ids_array = Vec::with_capacity(max_size);
-                let mut mask_array = Vec::with_capacity(max_size);
-                let mut typeids_array = Vec::with_capacity(max_size);
-
-                // Not using par_iter because the closure needs to be FnMut
-                encodings.iter().for_each(|encoding| {
-                    let ids = encoding.get_ids();
-                    let mask = encoding.get_attention_mask();
-                    let typeids = encoding.get_type_ids();
-
-                    // Extend the preallocated arrays with the current encoding
-                    // Requires the closure to be FnMut
-                    ids_array.extend(ids.iter().map(|x| *x as i64));
-                    mask_array.extend(mask.iter().map(|x| *x as i64));
-                    typeids_array.extend(typeids.iter().map(|x| *x as i64));
-                });
-
-                // Create CowArrays from vectors
-                let inputs_ids_array =
-                    Array::from_shape_vec((batch_size, encoding_length), ids_array)?;
-
-                let attention_mask_array =
-                    Array::from_shape_vec((batch_size, encoding_length), mask_array)?;
-
-                let token_type_ids_array =
-                    Array::from_shape_vec((batch_size, encoding_length), typeids_array)?;
-
-                let mut session_inputs = ort::inputs![
-                    "input_ids" => Value::from_array(inputs_ids_array)?,
-                    "attention_mask" => Value::from_array(attention_mask_array.view())?,
-                ]?;
-
-                if self.need_token_type_ids {
-                    session_inputs.push((
-                        "token_type_ids".into(),
-                        Value::from_array(token_type_ids_array)?.into(),
-                    ));
-                }
+            // Extract the encoding length and batch size
+            let encoding_length = encodings[0].len();
+            let batch_size = batch.len();
+
+            let max_size = encoding_length * batch_size;
+
+            // Preallocate arrays with the maximum size
+            let mut ids_array = Vec::with_capacity(max_size);
+            let mut mask_array = Vec::with_capacity(max_size);
+            let mut typeids_array = Vec::with_capacity(max_size);
+
+            // Not using par_iter because the closure needs to be FnMut
+            encodings.iter().for_each(|encoding| {
+                let ids = encoding.get_ids();
+                let mask = encoding.get_attention_mask();
+                let typeids = encoding.get_type_ids();
+
+                // Extend the preallocated arrays with the current encoding
+                // Requires the closure to be FnMut
+                ids_array.extend(ids.iter().map(|x| *x as i64));
+                mask_array.extend(mask.iter().map(|x| *x as i64));
+                typeids_array.extend(typeids.iter().map(|x| *x as i64));
+            });
+
+            // Create CowArrays from vectors
+            let inputs_ids_array = Array::from_shape_vec((batch_size, encoding_length), ids_array)?;
+
+            let attention_mask_array =
+                Array::from_shape_vec((batch_size, encoding_length), mask_array)?;
+
+            let token_type_ids_array =
+                Array::from_shape_vec((batch_size, encoding_length), typeids_array)?;
+
+            let mut session_inputs = ort::inputs![
+                "input_ids" => Value::from_array(inputs_ids_array)?,
+                "attention_mask" => Value::from_array(attention_mask_array.view())?,
+            ]?;
+
+            if self.need_token_type_ids {
+                session_inputs.push((
+                    "token_type_ids".into(),
+                    Value::from_array(token_type_ids_array)?.into(),
+                ));
+            }
 
-                Ok(
-                    // Package all the data required for post-processing (e.g. pooling)
-                    // into a SingleBatchOutput struct.
-                    SingleBatchOutput {
-                        session_outputs: self
-                            .session
-                            .run(session_inputs)
-                            .map_err(anyhow::Error::new)?,
-                        attention_mask_array,
-                    },
-                )
-            }))?;
+            Ok(
+                // Package all the data required for post-processing (e.g. pooling)
+                // into a SingleBatchOutput struct.
+                SingleBatchOutput {
+                    session_outputs: self
+                        .session
+                        .run(session_inputs)
+                        .map_err(anyhow::Error::new)?,
+                    attention_mask_array,
+                },
+            )
+        }))?;
 
         Ok(EmbeddingOutput::new(batches))
     }
@@ -311,7 +315,7 @@ impl TextEmbedding {
         &self,
         texts: Vec<S>,
         batch_size: Option<usize>,
-    ) -> anyhow::Result<Vec<Embedding>> {
+    ) -> Result<Vec<Embedding>> {
         let batches = self.transform(texts, batch_size)?;
 
         batches.export_with_transformer(output::transformer_with_precedence(
diff --git a/src/text_embedding/init.rs b/src/text_embedding/init.rs
index 2e85686..8490a9a 100644
--- a/src/text_embedding/init.rs
+++ b/src/text_embedding/init.rs
@@ -6,7 +6,7 @@ use crate::{
     pooling::Pooling,
     EmbeddingModel, QuantizationMode,
 };
-use ort::{ExecutionProviderDispatch, Session};
+use ort::{execution_providers::ExecutionProviderDispatch, session::Session};
 use std::{
     num::NonZero,
     path::{Path, PathBuf},
diff --git a/tests/embeddings.rs b/tests/embeddings.rs
index 0025c80..f8c8093 100644
--- a/tests/embeddings.rs
+++ b/tests/embeddings.rs
@@ -15,7 +15,7 @@ use fastembed::{
 };
 
 /// A small epsilon value for floating point comparisons.
-const EPS: f32 = 1e-4;
+const EPS: f32 = 1e-2;
 
 /// Precalculated embeddings for the supported models using #99
 /// (4f09b6842ce1fcfaf6362678afcad9a176e05304).
@@ -61,6 +61,7 @@ fn verify_embeddings(model: &EmbeddingModel, embeddings: &[Embedding]) -> Result
         EmbeddingModel::ParaphraseMLMiniLML12V2 => [-0.07795018, -0.059113946, -0.043668486, -0.1880083],
         EmbeddingModel::ParaphraseMLMiniLML12V2Q => [-0.07749095, -0.058981877, -0.043487836, -0.18775631],
         EmbeddingModel::ParaphraseMLMpnetBaseV2 => [0.39132136, 0.49490625, 0.65497226, 0.34237382],
+        EmbeddingModel::ClipVitB32 => [0.7057363, 1.3549932, 0.46823958, 0.52351093],
         _ => panic!("Model {model} not found. If you have just inserted this `EmbeddingModel` variant, please update the expected embeddings."),
     };
 
@@ -321,6 +322,7 @@ fn test_rerank() {
     });
 }
 
+#[ignore]
 #[test]
 fn test_user_defined_reranking_large_model() {
     // Setup model to download from Hugging Face
@@ -484,6 +486,63 @@ fn test_image_embedding_model() {
         });
 }
 
+#[test]
+#[ignore]
+fn test_nomic_embed_vision_v1_5() {
+    fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
+        let dot_product = a.iter().zip(b).map(|(x, y)| x * y).sum::<f32>();
+        let norm_a = a.iter().map(|x| x * x).sum::<f32>().sqrt();
+        let norm_b = b.iter().map(|x| x * x).sum::<f32>().sqrt();
+        dot_product / (norm_a * norm_b)
+    }
+
+    fn cosine_similarity_matrix(
+        embeddings_a: &[Vec<f32>],
+        embeddings_b: &[Vec<f32>],
+    ) -> Vec<Vec<f32>> {
+        embeddings_a
+            .iter()
+            .map(|a| {
+                embeddings_b
+                    .iter()
+                    .map(|b| cosine_similarity(a, b))
+                    .collect()
+            })
+            .collect()
+    }
+
+    // Test the NomicEmbedVisionV15 model specifically because it outputs a 3D tensor with a different
+    // output key ('last_hidden_state') compared to other models. This test ensures our tensor extraction
+    // logic can handle both standard output keys and this model's specific naming convention.
+    let image_model = ImageEmbedding::try_new(ImageInitOptions::new(
+        fastembed::ImageEmbeddingModel::NomicEmbedVisionV15,
+    ))
+    .unwrap();
+
+    // tests/assets/image_0.png is a blue cat
+    // tests/assets/image_1.png is a red cat
+    let images = vec!["tests/assets/image_0.png", "tests/assets/image_1.png"];
+    let image_embeddings = image_model.embed(images.clone(), None).unwrap();
+    assert_eq!(image_embeddings.len(), images.len());
+
+    let text_model = TextEmbedding::try_new(InitOptions::new(
+        fastembed::EmbeddingModel::NomicEmbedTextV15,
+    ))
+    .unwrap();
+    let texts = vec!["green cat", "blue cat", "red cat", "yellow cat", "dog"];
+    let text_embeddings = text_model.embed(texts.clone(), None).unwrap();
+
+    // Generate similarity matrix
+    let similarity_matrix = cosine_similarity_matrix(&text_embeddings, &image_embeddings);
+    // Print the similarity matrix with text labels
+    for (i, row) in similarity_matrix.iter().enumerate() {
+        println!("{}: {:?}", texts[i], row);
+    }
+
+    assert_eq!(text_embeddings.len(), texts.len());
+    assert_eq!(text_embeddings[0].len(), 768);
+}
+
 fn clean_cache(model_code: String) {
     let repo = Repo::model(model_code);
     let cache_dir = format!("{}/{}", DEFAULT_CACHE_DIR, repo.folder_name());