Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/lfx/src/lfx/_assets/component_index.json

Large diffs are not rendered by default.

33 changes: 25 additions & 8 deletions src/lfx/src/lfx/base/models/watsonx_constants.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,32 @@
from .model_metadata import create_model_metadata

# Granite Embedding models
WATSONX_EMBEDDING_MODELS_DETAILED = [
create_model_metadata(provider="IBM Watsonx", name="ibm/granite-embedding-125m-english", icon="IBMWatsonx"),
create_model_metadata(provider="IBM Watsonx", name="ibm/granite-embedding-278m-multilingual", icon="IBMWatsonx"),
create_model_metadata(provider="IBM Watsonx", name="ibm/granite-embedding-30m-english", icon="IBMWatsonx"),
create_model_metadata(provider="IBM Watsonx", name="ibm/granite-embedding-107m-multilingual", icon="IBMWatsonx"),
create_model_metadata(provider="IBM Watsonx", name="ibm/granite-embedding-30m-sparse", icon="IBMWatsonx"),
WATSONX_DEFAULT_EMBEDDING_MODELS = [
create_model_metadata(
provider="IBM Watsonx",
name="sentence-transformers/all-minilm-l12-v2",
icon="WatsonxAI",
),
create_model_metadata(
provider="IBM Watsonx",
name="ibm/slate-125m-english-rtrvr-v2",
icon="WatsonxAI",
),
create_model_metadata(
provider="IBM Watsonx",
name="ibm/slate-30m-english-rtrvr-v2",
icon="WatsonxAI",
),
create_model_metadata(
provider="IBM Watsonx",
name="intfloat/multilingual-e5-large",
icon="WatsonxAI",
),
]

WATSONX_EMBEDDING_MODEL_NAMES = [metadata["name"] for metadata in WATSONX_EMBEDDING_MODELS_DETAILED]



Copy link

Copilot AI Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary blank lines. There are three consecutive blank lines here, which violates PEP 8 style guide that recommends at most two blank lines between top-level definitions. Remove the extra blank lines.

Copilot uses AI. Check for mistakes.
WATSONX_EMBEDDING_MODEL_NAMES = [metadata["name"] for metadata in WATSONX_DEFAULT_EMBEDDING_MODELS]

IBM_WATSONX_URLS = [
"https://us-south.ml.cloud.ibm.com",
Expand Down
76 changes: 64 additions & 12 deletions src/lfx/src/lfx/components/models_and_agents/embedding_model.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from typing import Any

from ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames
from langchain_openai import OpenAIEmbeddings

from lfx.base.embeddings.model import LCEmbeddingsModel
from lfx.base.models.model_utils import get_ollama_models, is_valid_ollama_url
from lfx.base.models.openai_constants import OPENAI_EMBEDDING_MODEL_NAMES
from lfx.base.models.watsonx_constants import IBM_WATSONX_URLS, WATSONX_EMBEDDING_MODEL_NAMES
from lfx.base.models.watsonx_constants import IBM_WATSONX_URLS, WATSONX_DEFAULT_EMBEDDING_MODELS, WATSONX_EMBEDDING_MODEL_NAMES

Check failure on line 9 in src/lfx/src/lfx/components/models_and_agents/embedding_model.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.13)

Ruff (E501)

src/lfx/src/lfx/components/models_and_agents/embedding_model.py:9:121: E501 Line too long (127 > 120)

Check failure on line 9 in src/lfx/src/lfx/components/models_and_agents/embedding_model.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.13)

Ruff (F401)

src/lfx/src/lfx/components/models_and_agents/embedding_model.py:9:65: F401 `lfx.base.models.watsonx_constants.WATSONX_DEFAULT_EMBEDDING_MODELS` imported but unused
Copy link

Copilot AI Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Import of 'WATSONX_DEFAULT_EMBEDDING_MODELS' is not used.

Copilot uses AI. Check for mistakes.
from lfx.field_typing import Embeddings
from lfx.io import (
BoolInput,
Expand All @@ -19,6 +20,7 @@
from lfx.log.logger import logger
from lfx.schema.dotdict import dotdict
from lfx.utils.util import transform_localhost_url
import requests

Check failure on line 23 in src/lfx/src/lfx/components/models_and_agents/embedding_model.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.13)

Ruff (I001)

src/lfx/src/lfx/components/models_and_agents/embedding_model.py:1:1: I001 Import block is un-sorted or un-formatted
Copy link

Copilot AI Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Import statement import requests should follow PEP 8 convention and be placed at the top of the file with other imports. Currently it's placed after the local imports, which is inconsistent with the import ordering convention used in the codebase. It should be placed before the from imports from third-party libraries.

Copilot uses AI. Check for mistakes.

# Ollama API constants
HTTP_STATUS_OK = 200
Expand Down Expand Up @@ -77,6 +79,8 @@
options=OPENAI_EMBEDDING_MODEL_NAMES,
value=OPENAI_EMBEDDING_MODEL_NAMES[0],
info="Select the embedding model to use",
real_time_refresh=True,
refresh_button=True,
),
SecretStrInput(
name="api_key",
Expand Down Expand Up @@ -110,7 +114,38 @@
advanced=True,
info="Additional keyword arguments to pass to the model.",
),
IntInput(
name="truncate_input_tokens",
display_name="Truncate Input Tokens",
advanced=True,
value=200,
show=False,
),
BoolInput(
name="input_text",
display_name="Include the original text in the output",
value=True,
advanced=True,
show=False,
),
]
@staticmethod
def fetch_ibm_models(base_url: str) -> list[str]:
"""Fetch available models from the watsonx.ai API."""
try:
endpoint = f"{base_url}/ml/v1/foundation_model_specs"
params = {
"version": "2024-09-16",
"filters": "function_embedding,!lifecycle_withdrawn:and",
Copy link

Copilot AI Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The filter syntax "function_embedding,!lifecycle_withdrawn:and" has an unusual :and suffix at the end. Comparing with the similar implementation in language_model.py (line 57), which uses "function_text_chat,!lifecycle_withdrawn" without the :and suffix, this appears to be inconsistent. Consider removing the :and suffix or verifying the correct filter syntax with the IBM watsonx.ai API documentation.

Copilot uses AI. Check for mistakes.
}
response = requests.get(endpoint, params=params, timeout=10)
Copy link

Copilot AI Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The API request to fetch IBM models is made without authentication. This endpoint likely requires authentication but the request doesn't include any API key or credentials. Consider adding authentication headers or verifying if this endpoint is intended to be publicly accessible.

Copilot uses AI. Check for mistakes.
response.raise_for_status()
data = response.json()
models = [model["model_id"] for model in data.get("resources", [])]
return sorted(models)
except Exception: # noqa: BLE001
logger.exception("Error fetching models")
return WATSONX_EMBEDDING_MODEL_NAMES

def build_embeddings(self) -> Embeddings:
provider = self.provider
Expand Down Expand Up @@ -188,15 +223,26 @@
msg = "Project ID is required for IBM watsonx.ai provider"
raise ValueError(msg)

from ibm_watsonx_ai import APIClient, Credentials

credentials = Credentials(
api_key=self.api_key,
url=base_url_ibm_watsonx or "https://us-south.ml.cloud.ibm.com",
)

api_client = APIClient(credentials)

params = {
"model_id": model,
"url": base_url_ibm_watsonx or "https://us-south.ml.cloud.ibm.com",
"apikey": api_key,
EmbedTextParamsMetaNames.TRUNCATE_INPUT_TOKENS: self.truncate_input_tokens,
EmbedTextParamsMetaNames.RETURN_OPTIONS: {"input_text": self.input_text},
}

params["project_id"] = project_id

return WatsonxEmbeddings(**params)
return WatsonxEmbeddings(
model_id=model,
params=params,
watsonx_client=api_client,
project_id=project_id,
)

msg = f"Unknown provider: {provider}"
raise ValueError(msg)
Expand All @@ -217,7 +263,8 @@
build_config["ollama_base_url"]["show"] = False
build_config["project_id"]["show"] = False
build_config["base_url_ibm_watsonx"]["show"] = False

build_config["truncate_input_tokens"]["show"] = False
build_config["input_text"]["show"] = False
elif field_value == "Ollama":
build_config["ollama_base_url"]["show"] = True

Expand All @@ -238,7 +285,8 @@
else:
build_config["model"]["options"] = []
build_config["model"]["value"] = ""

build_config["truncate_input_tokens"]["show"] = False
build_config["input_text"]["show"] = False
build_config["api_key"]["display_name"] = "API Key (Optional)"
build_config["api_key"]["required"] = False
build_config["api_key"]["show"] = False
Expand All @@ -247,16 +295,20 @@
build_config["base_url_ibm_watsonx"]["show"] = False

elif field_value == "IBM watsonx.ai":
build_config["model"]["options"] = WATSONX_EMBEDDING_MODEL_NAMES
build_config["model"]["value"] = WATSONX_EMBEDDING_MODEL_NAMES[0]
build_config["model"]["options"] = self.fetch_ibm_models(base_url=self.base_url_ibm_watsonx)
build_config["model"]["value"] = self.fetch_ibm_models(base_url=self.base_url_ibm_watsonx)[0]
Comment on lines +302 to +303
Copy link

Copilot AI Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fetch_ibm_models method is called twice with the same base_url parameter on consecutive lines. This results in duplicate API requests. Consider storing the result in a variable and reusing it:

ibm_models = self.fetch_ibm_models(base_url=self.base_url_ibm_watsonx)
build_config["model"]["options"] = ibm_models
build_config["model"]["value"] = ibm_models[0]

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential IndexError if fetch_ibm_models returns an empty list. The code accesses [0] without checking if the list is non-empty. Consider adding a check or providing a fallback value:

ibm_models = self.fetch_ibm_models(base_url=self.base_url_ibm_watsonx)
build_config["model"]["options"] = ibm_models
build_config["model"]["value"] = ibm_models[0] if ibm_models else WATSONX_EMBEDDING_MODEL_NAMES[0]

Copilot uses AI. Check for mistakes.
build_config["api_key"]["display_name"] = "IBM watsonx.ai API Key"
build_config["api_key"]["required"] = True
build_config["api_key"]["show"] = True
build_config["api_base"]["show"] = False
build_config["ollama_base_url"]["show"] = False
build_config["base_url_ibm_watsonx"]["show"] = True
build_config["project_id"]["show"] = True

build_config["truncate_input_tokens"]["show"] = True
build_config["input_text"]["show"] = True
elif field_name == "base_url_ibm_watsonx":
build_config["model"]["options"] = self.fetch_ibm_models(base_url=field_value)
build_config["model"]["value"] = self.fetch_ibm_models(base_url=field_value)[0]
Comment on lines +314 to +315
Copy link

Copilot AI Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fetch_ibm_models method is called twice with the same field_value parameter on consecutive lines. This results in duplicate API requests. Consider storing the result in a variable and reusing it:

ibm_models = self.fetch_ibm_models(base_url=field_value)
build_config["model"]["options"] = ibm_models
build_config["model"]["value"] = ibm_models[0]

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential IndexError if fetch_ibm_models returns an empty list. The code accesses [0] without checking if the list is non-empty. Consider adding a check or providing a fallback value:

ibm_models = self.fetch_ibm_models(base_url=field_value)
build_config["model"]["options"] = ibm_models
build_config["model"]["value"] = ibm_models[0] if ibm_models else WATSONX_EMBEDDING_MODEL_NAMES[0]

Copilot uses AI. Check for mistakes.
elif field_name == "ollama_base_url":
# # Refresh Ollama models when base URL changes
# if hasattr(self, "provider") and self.provider == "Ollama":
Expand Down
Loading