From 2cfd69293004fa04aff51568480987a72878b4c2 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Tue, 26 Aug 2025 09:35:50 -0700 Subject: [PATCH 1/4] fix: Avoid namespace collision for Astra --- .../langflow/components/datastax/__init__.py | 4 +- .../{astradb.py => astradb_vectorstore.py} | 0 .../starter_projects/Hybrid Search RAG.json | 2550 ++++++++--------- .../starter_projects/Vector Store RAG.json | 2235 +++++++-------- 4 files changed, 2378 insertions(+), 2411 deletions(-) rename src/backend/base/langflow/components/datastax/{astradb.py => astradb_vectorstore.py} (100%) diff --git a/src/backend/base/langflow/components/datastax/__init__.py b/src/backend/base/langflow/components/datastax/__init__.py index 30bf2951d3d2..d635a4b3a771 100644 --- a/src/backend/base/langflow/components/datastax/__init__.py +++ b/src/backend/base/langflow/components/datastax/__init__.py @@ -8,9 +8,9 @@ from .astra_assistant_manager import AstraAssistantManager from .astra_db import AstraDBChatMemory from .astra_vectorize import AstraVectorizeComponent - from .astradb import AstraDBVectorStoreComponent from .astradb_cql import AstraDBCQLToolComponent from .astradb_tool import AstraDBToolComponent + from .astradb_vectorstore import AstraDBVectorStoreComponent from .create_assistant import AssistantsCreateAssistant from .create_thread import AssistantsCreateThread from .dotenv import Dotenv @@ -29,7 +29,7 @@ "AstraDBCQLToolComponent": "astradb_cql", "AstraDBChatMemory": "astra_db", "AstraDBToolComponent": "astradb_tool", - "AstraDBVectorStoreComponent": "astradb", + "AstraDBVectorStoreComponent": "astradb_vectorstore", "AstraVectorizeComponent": "astra_vectorize", "Dotenv": "dotenv", "GetEnvVar": "getenvvar", diff --git a/src/backend/base/langflow/components/datastax/astradb.py b/src/backend/base/langflow/components/datastax/astradb_vectorstore.py similarity index 100% rename from src/backend/base/langflow/components/datastax/astradb.py rename to src/backend/base/langflow/components/datastax/astradb_vectorstore.py diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json index fa5621569d14..e390d2feb385 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json @@ -7,7 +7,7 @@ "data": { "sourceHandle": { "dataType": "ParserComponent", - "id": "ParserComponent-3Wxa2", + "id": "ParserComponent-0KvmM", "name": "parsed_text", "output_types": [ "Message" @@ -15,7 +15,7 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "ChatOutput-811h1", + "id": "ChatOutput-zViXc", "inputTypes": [ "Data", "DataFrame", @@ -24,56 +24,56 @@ "type": "other" } }, - "id": "reactflow__edge-ParserComponent-3Wxa2{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-3Wxa2œ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-811h1{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-811h1œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-ParserComponent-0KvmM{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-0KvmMœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-zViXc{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-zViXcœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", "selected": false, - "source": "ParserComponent-3Wxa2", - "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-3Wxa2œ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", - "target": "ChatOutput-811h1", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-811h1œ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}" + "source": "ParserComponent-0KvmM", + "sourceHandle": "{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-0KvmMœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}", + "target": "ChatOutput-zViXc", + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-zViXcœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "ChatInput", - "id": "ChatInput-uZ7jn", - "name": "message", + "dataType": "LanguageModelComponent", + "id": "LanguageModelComponent-CRZxx", + "name": "text_output", "output_types": [ "Message" ] }, "targetHandle": { - "fieldName": "search_query", - "id": "AstraDB-7gXip", + "fieldName": "input_value", + "id": "StructuredOutput-AUzID", "inputTypes": [ "Message" ], - "type": "query" + "type": "str" } }, - "id": "reactflow__edge-ChatInput-uZ7jn{œdataTypeœ:œChatInputœ,œidœ:œChatInput-uZ7jnœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-7gXip{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-7gXipœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", + "id": "reactflow__edge-LanguageModelComponent-CRZxx{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-CRZxxœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-StructuredOutput-AUzID{œfieldNameœ:œinput_valueœ,œidœ:œStructuredOutput-AUzIDœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "ChatInput-uZ7jn", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-uZ7jnœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "AstraDB-7gXip", - "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-7gXipœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}" + "source": "LanguageModelComponent-CRZxx", + "sourceHandle": "{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-CRZxxœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}", + "target": "StructuredOutput-AUzID", + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œStructuredOutput-AUzIDœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "AstraDB", - "id": "AstraDB-7gXip", - "name": "dataframe", + "dataType": "StructuredOutput", + "id": "StructuredOutput-AUzID", + "name": "structured_output", "output_types": [ - "DataFrame" + "Data" ] }, "targetHandle": { "fieldName": "input_data", - "id": "ParserComponent-3Wxa2", + "id": "ParserComponent-6wYbr", "inputTypes": [ "DataFrame", "Data" @@ -81,12 +81,12 @@ "type": "other" } }, - "id": "reactflow__edge-AstraDB-7gXip{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-7gXipœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-ParserComponent-3Wxa2{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-3Wxa2œ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-StructuredOutput-AUzID{œdataTypeœ:œStructuredOutputœ,œidœ:œStructuredOutput-AUzIDœ,œnameœ:œstructured_outputœ,œoutput_typesœ:[œDataœ]}-ParserComponent-6wYbr{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-6wYbrœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", "selected": false, - "source": "AstraDB-7gXip", - "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-7gXipœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", - "target": "ParserComponent-3Wxa2", - "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-3Wxa2œ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" + "source": "StructuredOutput-AUzID", + "sourceHandle": "{œdataTypeœ:œStructuredOutputœ,œidœ:œStructuredOutput-AUzIDœ,œnameœ:œstructured_outputœ,œoutput_typesœ:[œDataœ]}", + "target": "ParserComponent-6wYbr", + "targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-6wYbrœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}" }, { "animated": false, @@ -94,90 +94,112 @@ "data": { "sourceHandle": { "dataType": "LanguageModelComponent", - "id": "LanguageModelComponent-NEQ8S", - "name": "text_output", + "id": "LanguageModelComponent-MD9V5", + "name": "model_output", + "output_types": [ + "LanguageModel" + ] + }, + "targetHandle": { + "fieldName": "llm", + "id": "StructuredOutput-AUzID", + "inputTypes": [ + "LanguageModel" + ], + "type": "other" + } + }, + "id": "reactflow__edge-LanguageModelComponent-MD9V5{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-MD9V5œ,œnameœ:œmodel_outputœ,œoutput_typesœ:[œLanguageModelœ]}-StructuredOutput-AUzID{œfieldNameœ:œllmœ,œidœ:œStructuredOutput-AUzIDœ,œinputTypesœ:[œLanguageModelœ],œtypeœ:œotherœ}", + "selected": false, + "source": "LanguageModelComponent-MD9V5", + "sourceHandle": "{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-MD9V5œ,œnameœ:œmodel_outputœ,œoutput_typesœ:[œLanguageModelœ]}", + "target": "StructuredOutput-AUzID", + "targetHandle": "{œfieldNameœ:œllmœ,œidœ:œStructuredOutput-AUzIDœ,œinputTypesœ:[œLanguageModelœ],œtypeœ:œotherœ}" + }, + { + "className": "", + "data": { + "sourceHandle": { + "dataType": "ParserComponent", + "id": "ParserComponent-6wYbr", + "name": "parsed_text", "output_types": [ "Message" ] }, "targetHandle": { - "fieldName": "input_value", - "id": "StructuredOutput-n8Y3t", + "fieldName": "lexical_terms", + "id": "AstraDB-93cal", "inputTypes": [ "Message" ], - "type": "str" + "type": "query" } }, - "id": "reactflow__edge-LanguageModelComponent-NEQ8S{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-NEQ8Sœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-StructuredOutput-n8Y3t{œfieldNameœ:œinput_valueœ,œidœ:œStructuredOutput-n8Y3tœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "selected": false, - "source": "LanguageModelComponent-NEQ8S", - "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-NEQ8Sœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", - "target": "StructuredOutput-n8Y3t", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œStructuredOutput-n8Y3tœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "id": "xy-edge__ParserComponent-6wYbr{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-6wYbrœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-AstraDB-93cal{œfieldNameœ:œlexical_termsœ,œidœ:œAstraDB-93calœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", + "source": "ParserComponent-6wYbr", + "sourceHandle": "{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-6wYbrœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}", + "target": "AstraDB-93cal", + "targetHandle": "{œfieldNameœ:œlexical_termsœ,œidœ:œAstraDB-93calœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}" }, { - "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "StructuredOutput", - "id": "StructuredOutput-n8Y3t", - "name": "structured_output", + "dataType": "ChatInput", + "id": "ChatInput-2JUiB", + "name": "message", "output_types": [ - "Data" + "Message" ] }, "targetHandle": { - "fieldName": "input_data", - "id": "ParserComponent-Kb474", + "fieldName": "search_query", + "id": "AstraDB-93cal", "inputTypes": [ - "DataFrame", - "Data" + "Message" ], - "type": "other" + "type": "query" } }, - "id": "reactflow__edge-StructuredOutput-n8Y3t{œdataTypeœ:œStructuredOutputœ,œidœ:œStructuredOutput-n8Y3tœ,œnameœ:œstructured_outputœ,œoutput_typesœ:[œDataœ]}-ParserComponent-Kb474{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-Kb474œ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", - "selected": false, - "source": "StructuredOutput-n8Y3t", - "sourceHandle": "{œdataTypeœ: œStructuredOutputœ, œidœ: œStructuredOutput-n8Y3tœ, œnameœ: œstructured_outputœ, œoutput_typesœ: [œDataœ]}", - "target": "ParserComponent-Kb474", - "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-Kb474œ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" + "id": "xy-edge__ChatInput-2JUiB{œdataTypeœ:œChatInputœ,œidœ:œChatInput-2JUiBœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-93cal{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-93calœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", + "source": "ChatInput-2JUiB", + "sourceHandle": "{œdataTypeœ:œChatInputœ,œidœ:œChatInput-2JUiBœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "target": "AstraDB-93cal", + "targetHandle": "{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-93calœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}" }, { - "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "LanguageModelComponent", - "id": "LanguageModelComponent-pB4iD", - "name": "model_output", + "dataType": "AstraDB", + "id": "AstraDB-93cal", + "name": "search_results", "output_types": [ - "LanguageModel" + "Data" ] }, "targetHandle": { - "fieldName": "llm", - "id": "StructuredOutput-n8Y3t", + "fieldName": "input_data", + "id": "ParserComponent-0KvmM", "inputTypes": [ - "LanguageModel" + "DataFrame", + "Data" ], "type": "other" } }, - "id": "reactflow__edge-LanguageModelComponent-pB4iD{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-pB4iDœ,œnameœ:œmodel_outputœ,œoutput_typesœ:[œLanguageModelœ]}-StructuredOutput-n8Y3t{œfieldNameœ:œllmœ,œidœ:œStructuredOutput-n8Y3tœ,œinputTypesœ:[œLanguageModelœ],œtypeœ:œotherœ}", - "selected": false, - "source": "LanguageModelComponent-pB4iD", - "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-pB4iDœ, œnameœ: œmodel_outputœ, œoutput_typesœ: [œLanguageModelœ]}", - "target": "StructuredOutput-n8Y3t", - "targetHandle": "{œfieldNameœ: œllmœ, œidœ: œStructuredOutput-n8Y3tœ, œinputTypesœ: [œLanguageModelœ], œtypeœ: œotherœ}" + "id": "xy-edge__AstraDB-93cal{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-93calœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}-ParserComponent-0KvmM{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-0KvmMœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", + "source": "AstraDB-93cal", + "sourceHandle": "{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-93calœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}", + "target": "ParserComponent-0KvmM", + "targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-0KvmMœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}" } ], "nodes": [ { "data": { - "id": "ChatInput-uZ7jn", + "id": "ChatInput-2JUiB", "node": { "base_classes": [ "Message" @@ -487,7 +509,7 @@ "type": "ChatInput" }, "dragging": false, - "id": "ChatInput-uZ7jn", + "id": "ChatInput-2JUiB", "measured": { "height": 48, "width": 192 @@ -501,7 +523,7 @@ }, { "data": { - "id": "ParserComponent-Kb474", + "id": "ParserComponent-6wYbr", "node": { "base_classes": [ "Message" @@ -672,7 +694,7 @@ "type": "ParserComponent" }, "dragging": false, - "id": "ParserComponent-Kb474", + "id": "ParserComponent-6wYbr", "measured": { "height": 329, "width": 320 @@ -686,7 +708,7 @@ }, { "data": { - "id": "ChatOutput-811h1", + "id": "ChatOutput-zViXc", "node": { "base_classes": [ "Message" @@ -1000,7 +1022,7 @@ "type": "ChatOutput" }, "dragging": false, - "id": "ChatOutput-811h1", + "id": "ChatOutput-zViXc", "measured": { "height": 48, "width": 192 @@ -1014,7 +1036,7 @@ }, { "data": { - "id": "ParserComponent-3Wxa2", + "id": "ParserComponent-0KvmM", "node": { "base_classes": [ "Message" @@ -1185,7 +1207,7 @@ "type": "ParserComponent" }, "dragging": false, - "id": "ParserComponent-3Wxa2", + "id": "ParserComponent-0KvmM", "measured": { "height": 246, "width": 320 @@ -1199,72 +1221,39 @@ }, { "data": { - "id": "AstraDB-7gXip", + "id": "LanguageModelComponent-CRZxx", "node": { "base_classes": [ - "Data", - "DataFrame", - "VectorStore" + "LanguageModel", + "Message" ], "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Ingest and search documents in Astra DB", - "display_name": "Astra DB", - "documentation": "https://docs.datastax.com/en/langflow/astra-components.html", + "description": "Runs a language model given a specified provider. ", + "display_name": "Language Model", + "documentation": "", "edited": false, "field_order": [ - "token", - "environment", - "database_name", - "api_endpoint", - "keyspace", - "collection_name", - "embedding_model", - "ingest_data", - "search_query", - "should_cache_vector_store", - "search_method", - "reranker", - "lexical_terms", - "number_of_results", - "search_type", - "search_score_threshold", - "advanced_search_filter", - "autodetect_collection", - "content_field", - "deletion_field", - "ignore_invalid_documents", - "astradb_vectorstore_kwargs" + "provider", + "model_name", + "api_key", + "input_value", + "system_message", + "stream", + "temperature" ], "frozen": false, - "icon": "AstraDB", + "icon": "brain-circuit", + "last_updated": "2025-08-26T16:33:20.961Z", "legacy": false, - "lf_version": "1.4.3", "metadata": { - "code_hash": "23fbe9daca09", - "dependencies": { - "dependencies": [ - { - "name": "astrapy", - "version": "2.0.1" - }, - { - "name": "langchain_astradb", - "version": "0.6.0" - }, - { - "name": "langchain_core", - "version": "0.3.72" - }, - { - "name": "langflow", - "version": null - } - ], - "total_dependencies": 4 - }, - "module": "langflow.components.datastax.astradb.AstraDBVectorStoreComponent" + "keywords": [ + "model", + "llm", + "language model", + "large language model" + ] }, "minimized": false, "output_types": [], @@ -1272,124 +1261,57 @@ { "allows_loop": false, "cache": true, - "display_name": "Search Results", - "group_outputs": false, - "method": "search_documents", - "name": "search_results", - "selected": "Data", - "tool_mode": true, - "types": [ - "Data" - ], - "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "DataFrame", + "display_name": "Model Response", "group_outputs": false, - "method": "as_dataframe", - "name": "dataframe", - "selected": "DataFrame", + "method": "text_response", + "name": "text_output", + "options": null, + "required_inputs": null, + "selected": "Message", "tool_mode": true, "types": [ - "DataFrame" + "Message" ], "value": "__UNDEFINED__" }, { "allows_loop": false, "cache": true, - "display_name": "Vector Store Connection", + "display_name": "Language Model", "group_outputs": false, - "hidden": true, - "method": "as_vector_store", - "name": "vectorstoreconnection", - "selected": "VectorStore", + "method": "build_model", + "name": "model_output", + "options": null, + "required_inputs": null, + "selected": "LanguageModel", "tool_mode": true, "types": [ - "VectorStore" + "LanguageModel" ], "value": "__UNDEFINED__" } ], "pinned": false, + "priority": 0, "template": { "_type": "Component", - "advanced_search_filter": { - "_input_type": "NestedDictInput", - "advanced": true, - "display_name": "Search Metadata Filter", - "dynamic": false, - "info": "Optional dictionary of filters to apply to the search query.", - "list": false, - "list_add_label": "Add More", - "name": "advanced_search_filter", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "NestedDict", - "value": {} - }, - "api_endpoint": { - "_input_type": "StrInput", + "api_key": { + "_input_type": "SecretStrInput", "advanced": false, - "display_name": "Astra DB API Endpoint", + "display_name": "OpenAI API Key", "dynamic": false, - "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.", - "list": false, - "list_add_label": "Add More", + "info": "Model Provider API key", + "input_types": [], "load_from_db": true, - "name": "api_endpoint", - "placeholder": "", - "required": false, - "show": false, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "ASTRA_DB_API_ENDPOINT" - }, - "astradb_vectorstore_kwargs": { - "_input_type": "NestedDictInput", - "advanced": true, - "display_name": "AstraDBVectorStore Parameters", - "dynamic": false, - "info": "Optional dictionary of additional parameters for the AstraDBVectorStore.", - "list": false, - "list_add_label": "Add More", - "name": "astradb_vectorstore_kwargs", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "NestedDict", - "value": {} - }, - "autodetect_collection": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Autodetect Collection", - "dynamic": false, - "info": "Boolean flag to determine whether to autodetect the collection.", - "list": false, - "list_add_label": "Add More", - "name": "autodetect_collection", + "name": "api_key", + "password": true, "placeholder": "", + "real_time_refresh": true, "required": false, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "str", + "value": "OPENAI_API_KEY" }, "code": { "advanced": true, @@ -1407,337 +1329,88 @@ "show": true, "title_case": false, "type": "code", - "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n 'your database in Astra DB'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f''\n \"your database in Astra DB.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "from typing import Any\n\nfrom langchain_anthropic import ChatAnthropic\nfrom langchain_google_genai import ChatGoogleGenerativeAI\nfrom langchain_openai import ChatOpenAI\n\nfrom langflow.base.models.anthropic_constants import ANTHROPIC_MODELS\nfrom langflow.base.models.google_generative_ai_constants import GOOGLE_GENERATIVE_AI_MODELS\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import OPENAI_CHAT_MODEL_NAMES, OPENAI_REASONING_MODEL_NAMES\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageInput, MultilineInput, SecretStrInput, SliderInput\nfrom langflow.schema.dotdict import dotdict\n\n\nclass LanguageModelComponent(LCModelComponent):\n display_name = \"Language Model\"\n description = \"Runs a language model given a specified provider.\"\n documentation: str = \"https://docs.langflow.org/components-models\"\n icon = \"brain-circuit\"\n category = \"models\"\n priority = 0 # Set priority to 0 to make it appear first\n\n inputs = [\n DropdownInput(\n name=\"provider\",\n display_name=\"Model Provider\",\n options=[\"OpenAI\", \"Anthropic\", \"Google\"],\n value=\"OpenAI\",\n info=\"Select the model provider\",\n real_time_refresh=True,\n options_metadata=[{\"icon\": \"OpenAI\"}, {\"icon\": \"Anthropic\"}, {\"icon\": \"GoogleGenerativeAI\"}],\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES,\n value=OPENAI_CHAT_MODEL_NAMES[0],\n info=\"Select the model to use\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"OpenAI API Key\",\n info=\"Model Provider API key\",\n required=False,\n show=True,\n real_time_refresh=True,\n ),\n MessageInput(\n name=\"input_value\",\n display_name=\"Input\",\n info=\"The input text to send to the model\",\n ),\n MultilineInput(\n name=\"system_message\",\n display_name=\"System Message\",\n info=\"A system message that helps set the behavior of the assistant\",\n advanced=False,\n ),\n BoolInput(\n name=\"stream\",\n display_name=\"Stream\",\n info=\"Whether to stream the response\",\n value=False,\n advanced=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n info=\"Controls randomness in responses\",\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n ]\n\n def build_model(self) -> LanguageModel:\n provider = self.provider\n model_name = self.model_name\n temperature = self.temperature\n stream = self.stream\n\n if provider == \"OpenAI\":\n if not self.api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n\n if model_name in OPENAI_REASONING_MODEL_NAMES:\n # reasoning models do not support temperature (yet)\n temperature = None\n\n return ChatOpenAI(\n model_name=model_name,\n temperature=temperature,\n streaming=stream,\n openai_api_key=self.api_key,\n )\n if provider == \"Anthropic\":\n if not self.api_key:\n msg = \"Anthropic API key is required when using Anthropic provider\"\n raise ValueError(msg)\n return ChatAnthropic(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n anthropic_api_key=self.api_key,\n )\n if provider == \"Google\":\n if not self.api_key:\n msg = \"Google API key is required when using Google provider\"\n raise ValueError(msg)\n return ChatGoogleGenerativeAI(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n google_api_key=self.api_key,\n )\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n if field_name == \"provider\":\n if field_value == \"OpenAI\":\n build_config[\"model_name\"][\"options\"] = OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES\n build_config[\"model_name\"][\"value\"] = OPENAI_CHAT_MODEL_NAMES[0]\n build_config[\"api_key\"][\"display_name\"] = \"OpenAI API Key\"\n elif field_value == \"Anthropic\":\n build_config[\"model_name\"][\"options\"] = ANTHROPIC_MODELS\n build_config[\"model_name\"][\"value\"] = ANTHROPIC_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Anthropic API Key\"\n elif field_value == \"Google\":\n build_config[\"model_name\"][\"options\"] = GOOGLE_GENERATIVE_AI_MODELS\n build_config[\"model_name\"][\"value\"] = GOOGLE_GENERATIVE_AI_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Google API Key\"\n elif field_name == \"model_name\" and field_value.startswith(\"o1\") and self.provider == \"OpenAI\":\n # Hide system_message for o1 models - currently unsupported\n if \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = False\n elif field_name == \"model_name\" and not field_value.startswith(\"o1\") and \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = True\n return build_config\n" }, - "collection_name": { - "_input_type": "DropdownInput", + "input_value": { + "_input_type": "MessageInput", "advanced": false, - "combobox": true, - "dialog_inputs": { - "fields": { - "data": { - "node": { - "description": "Please allow several seconds for creation to complete.", - "display_name": "Create new collection", - "field_order": [ - "01_new_collection_name", - "02_embedding_generation_provider", - "03_embedding_generation_model", - "04_dimension" - ], - "name": "create_collection", - "template": { - "01_new_collection_name": { - "_input_type": "StrInput", - "advanced": false, - "display_name": "Name", - "dynamic": false, - "info": "Name of the new collection to create in Astra DB.", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "new_collection_name", - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "02_embedding_generation_provider": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Embedding generation method", - "dynamic": false, - "helper_text": "To create collections with more embedding provider options, go to your database in Astra DB.", - "info": "Provider to use for generating embeddings.", - "name": "embedding_generation_provider", - "options": [ - "Bring your own", - "Nvidia" - ], - "options_metadata": [ - { - "icon": "vectorstores" - }, - { - "icon": "NVIDIA" - } - ], - "placeholder": "", - "real_time_refresh": true, - "required": true, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "03_embedding_generation_model": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Embedding model", - "dynamic": false, - "info": "Model to use for generating embeddings.", - "name": "embedding_generation_model", - "options": [], - "options_metadata": [], - "placeholder": null, - "readonly": "", - "real_time_refresh": true, - "required": true, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": null - }, - "04_dimension": { - "_input_type": "IntInput", - "advanced": false, - "display_name": "Dimensions", - "dynamic": false, - "info": "Dimensions of the embeddings to generate.", - "list": false, - "list_add_label": "Add More", - "name": "dimension", - "placeholder": 1024, - "readonly": true, - "required": "", - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 1024 - } - } - } - } - }, - "functionality": "create" - }, - "display_name": "Collection", - "dynamic": false, - "info": "The name of the collection within Astra DB where the vectors will be stored.", - "load_from_db": false, - "name": "collection_name", - "options": [], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": true, - "refresh_button": true, - "required": true, - "show": false, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "content_field": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Content Field", + "display_name": "Input", "dynamic": false, - "info": "Field to use as the text content field for the vector store.", + "info": "The input text to send to the model", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "name": "content_field", + "name": "input_value", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "" + "value": "You are an AI system designed to extract structured information from unstructured text.Given the input_text, return a JSON object with predefined keys based on the expected structure.Extract values accurately and format them according to the specified type (e.g., string, integer, float, date).If a value is missing or cannot be determined, return a default (e.g., null, 0, or 'N/A').If multiple instances of the expected structure exist within the input_text, stream each as a separate JSON object." }, - "database_name": { + "model_name": { "_input_type": "DropdownInput", "advanced": false, - "combobox": true, - "dialog_inputs": { - "fields": { - "data": { - "node": { - "description": "Please allow several minutes for creation to complete.", - "display_name": "Create new database", - "field_order": [ - "01_new_database_name", - "02_cloud_provider", - "03_region" - ], - "name": "create_database", - "template": { - "01_new_database_name": { - "_input_type": "StrInput", - "advanced": false, - "display_name": "Name", - "dynamic": false, - "info": "Name of the new database to create in Astra DB.", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "new_database_name", - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "02_cloud_provider": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Cloud provider", - "dynamic": false, - "info": "Cloud provider for the new database.", - "name": "cloud_provider", - "options": [ - "Amazon Web Services", - "Google Cloud Platform", - "Microsoft Azure" - ], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": true, - "required": true, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "03_region": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Region", - "dynamic": false, - "info": "Region for the new database.", - "name": "region", - "options": [], - "options_metadata": [], - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - } - } - } - } - }, - "functionality": "create" - }, - "display_name": "Database", + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model Name", "dynamic": false, - "info": "The Database name for the Astra DB instance.", - "load_from_db": false, - "name": "database_name", - "options": [], - "options_metadata": [ - { - "api_endpoint": "https://5b8bb22c-4a38-4f0a-865c-a18ed7590bd1-us-east-2.apps.astra.datastax.com", - "collections": 5, - "keyspaces": [ - "default_keyspace", - "samples_dataflow" - ], - "org_id": "260f986d-e65c-4f05-94a3-7cebfcb867a3", - "status": null - } + "info": "Select the model to use", + "name": "model_name", + "options": [ + "gpt-4o-mini", + "gpt-4o", + "gpt-4.1", + "gpt-4.1-mini", + "gpt-4.1-nano", + "gpt-4.5-preview", + "gpt-4-turbo", + "gpt-4-turbo-preview", + "gpt-4", + "gpt-3.5-turbo" ], - "placeholder": "", - "real_time_refresh": true, - "refresh_button": true, - "required": true, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "deletion_field": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Deletion Based On Field", - "dynamic": false, - "info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "deletion_field", + "options_metadata": [], "placeholder": "", "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "" - }, - "embedding_model": { - "_input_type": "HandleInput", - "advanced": false, - "display_name": "Embedding Model", - "dynamic": false, - "info": "Specify the Embedding Model. Not required for Astra Vectorize collections.", - "input_types": [ - "Embeddings" - ], - "list": false, - "list_add_label": "Add More", - "name": "embedding_model", - "placeholder": "", - "required": false, - "show": false, - "title_case": false, - "trace_as_metadata": true, - "type": "other", - "value": "" + "value": "gpt-4o-mini" }, - "environment": { + "provider": { "_input_type": "DropdownInput", - "advanced": true, - "combobox": true, + "advanced": false, + "combobox": false, "dialog_inputs": {}, - "display_name": "Environment", + "display_name": "Model Provider", "dynamic": false, - "info": "The environment for the Astra DB API Endpoint.", - "name": "environment", + "info": "Select the model provider", + "name": "provider", "options": [ - "prod", - "test", - "dev" + "OpenAI", + "Anthropic", + "Google" + ], + "options_metadata": [ + { + "icon": "OpenAI" + }, + { + "icon": "Anthropic" + }, + { + "icon": "Google" + } ], - "options_metadata": [], "placeholder": "", "real_time_refresh": true, "required": false, @@ -1747,17 +1420,17 @@ "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "prod" + "value": "OpenAI" }, - "ignore_invalid_documents": { + "stream": { "_input_type": "BoolInput", "advanced": true, - "display_name": "Ignore Invalid Documents", + "display_name": "Stream", "dynamic": false, - "info": "Boolean flag to determine whether to ignore invalid documents at runtime.", + "info": "Whether to stream the response", "list": false, "list_add_label": "Add More", - "name": "ignore_invalid_documents", + "name": "stream", "placeholder": "", "required": false, "show": true, @@ -1767,140 +1440,269 @@ "type": "bool", "value": false }, - "ingest_data": { - "_input_type": "HandleInput", - "advanced": false, - "display_name": "Ingest Data", + "system_message": { + "_input_type": "MultilineInput", + "advanced": true, + "copy_field": false, + "display_name": "System Message", "dynamic": false, - "info": "", + "info": "A system message that helps set the behavior of the assistant", "input_types": [ - "Data", - "DataFrame" + "Message" ], - "list": true, + "list": false, "list_add_label": "Add More", - "name": "ingest_data", + "load_from_db": false, + "multiline": true, + "name": "system_message", "placeholder": "", "required": false, "show": true, "title_case": false, + "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "other", + "type": "str", "value": "" }, - "keyspace": { - "_input_type": "DropdownInput", + "temperature": { + "_input_type": "SliderInput", "advanced": true, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Keyspace", + "display_name": "Temperature", "dynamic": false, - "info": "Optional keyspace within Astra DB to use for the collection.", - "load_from_db": false, - "name": "keyspace", - "options": [], - "options_metadata": [], + "info": "Controls randomness in responses", + "max_label": "", + "max_label_icon": "", + "min_label": "", + "min_label_icon": "", + "name": "temperature", "placeholder": "", - "real_time_refresh": true, + "range_spec": { + "max": 1, + "min": 0, + "step": 0.01, + "step_type": "float" + }, "required": false, "show": true, + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, "title_case": false, - "toggle": false, "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "default_keyspace" + "type": "slider", + "value": 0.1 + } + }, + "tool_mode": false + }, + "selected_output": "text_output", + "showNode": true, + "type": "LanguageModelComponent" + }, + "dragging": false, + "id": "LanguageModelComponent-CRZxx", + "measured": { + "height": 451, + "width": 320 + }, + "position": { + "x": 320.756607335245, + "y": 486.0770655861057 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "LanguageModelComponent-MD9V5", + "node": { + "base_classes": [ + "LanguageModel", + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Runs a language model given a specified provider. ", + "display_name": "Language Model", + "documentation": "", + "edited": false, + "field_order": [ + "provider", + "model_name", + "api_key", + "input_value", + "system_message", + "stream", + "temperature" + ], + "frozen": false, + "icon": "brain-circuit", + "last_updated": "2025-08-26T16:33:20.962Z", + "legacy": false, + "metadata": { + "keywords": [ + "model", + "llm", + "language model", + "large language model" + ] + }, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Model Response", + "group_outputs": false, + "method": "text_response", + "name": "text_output", + "options": null, + "required_inputs": null, + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" }, - "lexical_terms": { - "_input_type": "QueryInput", + { + "allows_loop": false, + "cache": true, + "display_name": "Language Model", + "group_outputs": false, + "method": "build_model", + "name": "model_output", + "options": null, + "required_inputs": null, + "selected": "LanguageModel", + "tool_mode": true, + "types": [ + "LanguageModel" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "priority": 0, + "template": { + "_type": "Component", + "api_key": { + "_input_type": "SecretStrInput", "advanced": false, - "display_name": "Lexical Terms", + "display_name": "OpenAI API Key", "dynamic": false, - "info": "Add additional terms/keywords to augment search precision.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "lexical_terms", - "placeholder": "Enter terms to search...", + "info": "Model Provider API key", + "input_types": [], + "load_from_db": true, + "name": "api_key", + "password": true, + "placeholder": "", + "real_time_refresh": true, "required": false, - "separator": " ", - "show": false, + "show": true, "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "query", - "value": "" + "type": "str", + "value": "OPENAI_API_KEY" }, - "number_of_results": { - "_input_type": "IntInput", + "code": { "advanced": true, - "display_name": "Number of Search Results", + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from typing import Any\n\nfrom langchain_anthropic import ChatAnthropic\nfrom langchain_google_genai import ChatGoogleGenerativeAI\nfrom langchain_openai import ChatOpenAI\n\nfrom langflow.base.models.anthropic_constants import ANTHROPIC_MODELS\nfrom langflow.base.models.google_generative_ai_constants import GOOGLE_GENERATIVE_AI_MODELS\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import OPENAI_CHAT_MODEL_NAMES, OPENAI_REASONING_MODEL_NAMES\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageInput, MultilineInput, SecretStrInput, SliderInput\nfrom langflow.schema.dotdict import dotdict\n\n\nclass LanguageModelComponent(LCModelComponent):\n display_name = \"Language Model\"\n description = \"Runs a language model given a specified provider.\"\n documentation: str = \"https://docs.langflow.org/components-models\"\n icon = \"brain-circuit\"\n category = \"models\"\n priority = 0 # Set priority to 0 to make it appear first\n\n inputs = [\n DropdownInput(\n name=\"provider\",\n display_name=\"Model Provider\",\n options=[\"OpenAI\", \"Anthropic\", \"Google\"],\n value=\"OpenAI\",\n info=\"Select the model provider\",\n real_time_refresh=True,\n options_metadata=[{\"icon\": \"OpenAI\"}, {\"icon\": \"Anthropic\"}, {\"icon\": \"GoogleGenerativeAI\"}],\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES,\n value=OPENAI_CHAT_MODEL_NAMES[0],\n info=\"Select the model to use\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"OpenAI API Key\",\n info=\"Model Provider API key\",\n required=False,\n show=True,\n real_time_refresh=True,\n ),\n MessageInput(\n name=\"input_value\",\n display_name=\"Input\",\n info=\"The input text to send to the model\",\n ),\n MultilineInput(\n name=\"system_message\",\n display_name=\"System Message\",\n info=\"A system message that helps set the behavior of the assistant\",\n advanced=False,\n ),\n BoolInput(\n name=\"stream\",\n display_name=\"Stream\",\n info=\"Whether to stream the response\",\n value=False,\n advanced=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n info=\"Controls randomness in responses\",\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n ]\n\n def build_model(self) -> LanguageModel:\n provider = self.provider\n model_name = self.model_name\n temperature = self.temperature\n stream = self.stream\n\n if provider == \"OpenAI\":\n if not self.api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n\n if model_name in OPENAI_REASONING_MODEL_NAMES:\n # reasoning models do not support temperature (yet)\n temperature = None\n\n return ChatOpenAI(\n model_name=model_name,\n temperature=temperature,\n streaming=stream,\n openai_api_key=self.api_key,\n )\n if provider == \"Anthropic\":\n if not self.api_key:\n msg = \"Anthropic API key is required when using Anthropic provider\"\n raise ValueError(msg)\n return ChatAnthropic(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n anthropic_api_key=self.api_key,\n )\n if provider == \"Google\":\n if not self.api_key:\n msg = \"Google API key is required when using Google provider\"\n raise ValueError(msg)\n return ChatGoogleGenerativeAI(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n google_api_key=self.api_key,\n )\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n if field_name == \"provider\":\n if field_value == \"OpenAI\":\n build_config[\"model_name\"][\"options\"] = OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES\n build_config[\"model_name\"][\"value\"] = OPENAI_CHAT_MODEL_NAMES[0]\n build_config[\"api_key\"][\"display_name\"] = \"OpenAI API Key\"\n elif field_value == \"Anthropic\":\n build_config[\"model_name\"][\"options\"] = ANTHROPIC_MODELS\n build_config[\"model_name\"][\"value\"] = ANTHROPIC_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Anthropic API Key\"\n elif field_value == \"Google\":\n build_config[\"model_name\"][\"options\"] = GOOGLE_GENERATIVE_AI_MODELS\n build_config[\"model_name\"][\"value\"] = GOOGLE_GENERATIVE_AI_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Google API Key\"\n elif field_name == \"model_name\" and field_value.startswith(\"o1\") and self.provider == \"OpenAI\":\n # Hide system_message for o1 models - currently unsupported\n if \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = False\n elif field_name == \"model_name\" and not field_value.startswith(\"o1\") and \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = True\n return build_config\n" + }, + "input_value": { + "_input_type": "MessageInput", + "advanced": false, + "display_name": "Input", "dynamic": false, - "info": "Number of search results to return.", + "info": "The input text to send to the model", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", - "name": "number_of_results", + "load_from_db": false, + "name": "input_value", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "int", - "value": 4 + "type": "str", + "value": "" }, - "reranker": { + "model_name": { "_input_type": "DropdownInput", "advanced": false, "combobox": false, "dialog_inputs": {}, - "display_name": "Reranker", - "dynamic": false, - "info": "Post-retrieval model that re-scores results for optimal relevance ranking.", - "load_from_db": false, - "name": "reranker", - "options": [], - "options_metadata": [ - { - "icon": "NVIDIA" - } + "display_name": "Model Name", + "dynamic": false, + "info": "Select the model to use", + "name": "model_name", + "options": [ + "gpt-4o-mini", + "gpt-4o", + "gpt-4.1", + "gpt-4.1-mini", + "gpt-4.1-nano", + "gpt-4.5-preview", + "gpt-4-turbo", + "gpt-4-turbo-preview", + "gpt-4", + "gpt-3.5-turbo" ], + "options_metadata": [], "placeholder": "", "required": false, "show": true, "title_case": false, - "toggle": true, - "toggle_disable": true, - "toggle_value": true, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "nvidia/llama-3.2-nv-rerankqa-1b-v2" + "value": "gpt-4o-mini" }, - "search_method": { + "provider": { "_input_type": "DropdownInput", - "advanced": true, + "advanced": false, "combobox": false, "dialog_inputs": {}, - "display_name": "Search Method", + "display_name": "Model Provider", "dynamic": false, - "info": "Determine how your content is matched: Vector finds semantic similarity, and Hybrid Search (suggested) combines both approaches with a reranker.", - "load_from_db": false, - "name": "search_method", + "info": "Select the model provider", + "name": "provider", "options": [ - "Hybrid Search", - "Vector Search" + "OpenAI", + "Anthropic", + "Google" ], "options_metadata": [ { - "icon": "SearchHybrid" + "icon": "OpenAI" }, { - "icon": "SearchVector" + "icon": "Anthropic" + }, + { + "icon": "Google" } ], "placeholder": "", @@ -1912,164 +1714,143 @@ "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "Hybrid Search" - }, - "search_query": { - "_input_type": "QueryInput", - "advanced": false, - "display_name": "Search Query", - "dynamic": false, - "info": "Enter a query to run a similarity search.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "search_query", - "placeholder": "Enter a query...", - "required": false, - "show": true, - "title_case": false, - "tool_mode": true, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "query", - "value": "" + "value": "OpenAI" }, - "search_score_threshold": { - "_input_type": "FloatInput", + "stream": { + "_input_type": "BoolInput", "advanced": true, - "display_name": "Search Score Threshold", + "display_name": "Stream", "dynamic": false, - "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')", + "info": "Whether to stream the response", "list": false, "list_add_label": "Add More", - "name": "search_score_threshold", + "name": "stream", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, - "type": "float", - "value": 0 + "type": "bool", + "value": false }, - "search_type": { - "_input_type": "DropdownInput", + "system_message": { + "_input_type": "MultilineInput", "advanced": true, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Search Type", + "copy_field": false, + "display_name": "System Message", "dynamic": false, - "info": "Search type to use", - "name": "search_type", - "options": [ - "Similarity", - "Similarity with score threshold", - "MMR (Max Marginal Relevance)" + "info": "A system message that helps set the behavior of the assistant", + "input_types": [ + "Message" ], - "options_metadata": [], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "system_message", "placeholder": "", "required": false, "show": true, "title_case": false, - "toggle": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "Similarity" + "value": "" }, - "should_cache_vector_store": { - "_input_type": "BoolInput", + "temperature": { + "_input_type": "SliderInput", "advanced": true, - "display_name": "Cache Vector Store", + "display_name": "Temperature", "dynamic": false, - "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.", - "list": false, - "list_add_label": "Add More", - "name": "should_cache_vector_store", + "info": "Controls randomness in responses", + "max_label": "", + "max_label_icon": "", + "min_label": "", + "min_label_icon": "", + "name": "temperature", "placeholder": "", + "range_spec": { + "max": 1, + "min": 0, + "step": 0.01, + "step_type": "float" + }, "required": false, "show": true, + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, "title_case": false, "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "token": { - "_input_type": "SecretStrInput", - "advanced": false, - "display_name": "Astra DB Application Token", - "dynamic": false, - "info": "Authentication token for accessing Astra DB.", - "input_types": [], - "load_from_db": true, - "name": "token", - "password": true, - "placeholder": "", - "real_time_refresh": true, - "required": true, - "show": true, - "title_case": false, - "type": "str", - "value": "ASTRA_DB_APPLICATION_TOKEN" + "type": "slider", + "value": 0.1 } }, "tool_mode": false }, - "selected_output": "dataframe", + "selected_output": "model_output", "showNode": true, - "type": "AstraDB" + "type": "LanguageModelComponent" }, "dragging": false, - "id": "AstraDB-7gXip", + "id": "LanguageModelComponent-MD9V5", "measured": { - "height": 540, + "height": 451, "width": 320 }, "position": { - "x": 1548.269269836593, - "y": 162.5619344372189 + "x": 322.5971643968167, + "y": -36.64113990031162 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "LanguageModelComponent-NEQ8S", + "id": "StructuredOutput-AUzID", "node": { "base_classes": [ - "LanguageModel", - "Message" + "Data" ], "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Runs a language model given a specified provider. ", - "display_name": "Language Model", + "description": "Uses an LLM to generate structured data. Ideal for extraction and consistency.", + "display_name": "Structured Output", "documentation": "", "edited": false, "field_order": [ - "provider", - "model_name", - "api_key", + "llm", "input_value", - "system_message", - "stream", - "temperature" + "system_prompt", + "schema_name", + "output_schema" ], "frozen": false, - "icon": "brain-circuit", + "icon": "braces", "legacy": false, "metadata": { - "keywords": [ - "model", - "llm", - "language model", - "large language model" - ] + "code_hash": "ad2a6f4552c0", + "dependencies": { + "dependencies": [ + { + "name": "pydantic", + "version": "2.10.6" + }, + { + "name": "trustcall", + "version": "0.0.39" + }, + { + "name": "langflow", + "version": null + } + ], + "total_dependencies": 3 + }, + "module": "langflow.components.processing.structured_output.StructuredOutputComponent" }, "minimized": false, "output_types": [], @@ -2077,58 +1858,34 @@ { "allows_loop": false, "cache": true, - "display_name": "Model Response", + "display_name": "Structured Output", "group_outputs": false, - "method": "text_response", - "name": "text_output", - "options": null, - "required_inputs": null, - "selected": "Message", + "method": "build_structured_output", + "name": "structured_output", + "selected": "Data", "tool_mode": true, "types": [ - "Message" + "Data" ], "value": "__UNDEFINED__" }, { - "allows_loop": false, - "cache": true, - "display_name": "Language Model", - "group_outputs": false, - "method": "build_model", - "name": "model_output", - "options": null, - "required_inputs": null, - "selected": "LanguageModel", - "tool_mode": true, - "types": [ - "LanguageModel" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "priority": 0, - "template": { - "_type": "Component", - "api_key": { - "_input_type": "SecretStrInput", - "advanced": false, - "display_name": "OpenAI API Key", - "dynamic": false, - "info": "Model Provider API key", - "input_types": [], - "load_from_db": true, - "name": "api_key", - "password": true, - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "type": "str", - "value": "OPENAI_API_KEY" - }, + "allows_loop": false, + "cache": true, + "display_name": "Structured Output", + "group_outputs": false, + "method": "build_structured_dataframe", + "name": "dataframe_output", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", "code": { "advanced": true, "dynamic": true, @@ -2145,14 +1902,14 @@ "show": true, "title_case": false, "type": "code", - "value": "from typing import Any\n\nfrom langchain_anthropic import ChatAnthropic\nfrom langchain_google_genai import ChatGoogleGenerativeAI\nfrom langchain_openai import ChatOpenAI\n\nfrom langflow.base.models.anthropic_constants import ANTHROPIC_MODELS\nfrom langflow.base.models.google_generative_ai_constants import GOOGLE_GENERATIVE_AI_MODELS\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import OPENAI_CHAT_MODEL_NAMES, OPENAI_REASONING_MODEL_NAMES\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageInput, MultilineInput, SecretStrInput, SliderInput\nfrom langflow.schema.dotdict import dotdict\n\n\nclass LanguageModelComponent(LCModelComponent):\n display_name = \"Language Model\"\n description = \"Runs a language model given a specified provider.\"\n documentation: str = \"https://docs.langflow.org/components-models\"\n icon = \"brain-circuit\"\n category = \"models\"\n priority = 0 # Set priority to 0 to make it appear first\n\n inputs = [\n DropdownInput(\n name=\"provider\",\n display_name=\"Model Provider\",\n options=[\"OpenAI\", \"Anthropic\", \"Google\"],\n value=\"OpenAI\",\n info=\"Select the model provider\",\n real_time_refresh=True,\n options_metadata=[{\"icon\": \"OpenAI\"}, {\"icon\": \"Anthropic\"}, {\"icon\": \"GoogleGenerativeAI\"}],\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES,\n value=OPENAI_CHAT_MODEL_NAMES[0],\n info=\"Select the model to use\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"OpenAI API Key\",\n info=\"Model Provider API key\",\n required=False,\n show=True,\n real_time_refresh=True,\n ),\n MessageInput(\n name=\"input_value\",\n display_name=\"Input\",\n info=\"The input text to send to the model\",\n ),\n MultilineInput(\n name=\"system_message\",\n display_name=\"System Message\",\n info=\"A system message that helps set the behavior of the assistant\",\n advanced=False,\n ),\n BoolInput(\n name=\"stream\",\n display_name=\"Stream\",\n info=\"Whether to stream the response\",\n value=False,\n advanced=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n info=\"Controls randomness in responses\",\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n ]\n\n def build_model(self) -> LanguageModel:\n provider = self.provider\n model_name = self.model_name\n temperature = self.temperature\n stream = self.stream\n\n if provider == \"OpenAI\":\n if not self.api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n\n if model_name in OPENAI_REASONING_MODEL_NAMES:\n # reasoning models do not support temperature (yet)\n temperature = None\n\n return ChatOpenAI(\n model_name=model_name,\n temperature=temperature,\n streaming=stream,\n openai_api_key=self.api_key,\n )\n if provider == \"Anthropic\":\n if not self.api_key:\n msg = \"Anthropic API key is required when using Anthropic provider\"\n raise ValueError(msg)\n return ChatAnthropic(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n anthropic_api_key=self.api_key,\n )\n if provider == \"Google\":\n if not self.api_key:\n msg = \"Google API key is required when using Google provider\"\n raise ValueError(msg)\n return ChatGoogleGenerativeAI(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n google_api_key=self.api_key,\n )\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n if field_name == \"provider\":\n if field_value == \"OpenAI\":\n build_config[\"model_name\"][\"options\"] = OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES\n build_config[\"model_name\"][\"value\"] = OPENAI_CHAT_MODEL_NAMES[0]\n build_config[\"api_key\"][\"display_name\"] = \"OpenAI API Key\"\n elif field_value == \"Anthropic\":\n build_config[\"model_name\"][\"options\"] = ANTHROPIC_MODELS\n build_config[\"model_name\"][\"value\"] = ANTHROPIC_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Anthropic API Key\"\n elif field_value == \"Google\":\n build_config[\"model_name\"][\"options\"] = GOOGLE_GENERATIVE_AI_MODELS\n build_config[\"model_name\"][\"value\"] = GOOGLE_GENERATIVE_AI_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Google API Key\"\n elif field_name == \"model_name\" and field_value.startswith(\"o1\") and self.provider == \"OpenAI\":\n # Hide system_message for o1 models - currently unsupported\n if \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = False\n elif field_name == \"model_name\" and not field_value.startswith(\"o1\") and \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = True\n return build_config\n" + "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n documentation: str = \"https://docs.langflow.org/components-processing#structured-output\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n Output(\n name=\"dataframe_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_dataframe\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) == 1:\n return Data(data=output[0])\n if len(output) > 1:\n # Multiple outputs - wrap them in a results container\n return Data(data={\"results\": output})\n return Data()\n\n def build_structured_dataframe(self) -> DataFrame:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n data_list = [Data(data=output[0])] if len(output) == 1 else [Data(data=item) for item in output]\n\n return DataFrame(data_list)\n" }, "input_value": { - "_input_type": "MessageInput", + "_input_type": "MessageTextInput", "advanced": false, - "display_name": "Input", + "display_name": "Input Message", "dynamic": false, - "info": "The input text to send to the model", + "info": "The input message to the language model.", "input_types": [ "Message" ], @@ -2161,108 +1918,156 @@ "load_from_db": false, "name": "input_value", "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, - "tool_mode": false, + "tool_mode": true, "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "You are an AI system designed to extract structured information from unstructured text.Given the input_text, return a JSON object with predefined keys based on the expected structure.Extract values accurately and format them according to the specified type (e.g., string, integer, float, date).If a value is missing or cannot be determined, return a default (e.g., null, 0, or 'N/A').If multiple instances of the expected structure exist within the input_text, stream each as a separate JSON object." + "value": "" }, - "model_name": { - "_input_type": "DropdownInput", + "llm": { + "_input_type": "HandleInput", "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Model Name", + "display_name": "Language Model", "dynamic": false, - "info": "Select the model to use", - "name": "model_name", - "options": [ - "gpt-4o-mini", - "gpt-4o", - "gpt-4.1", - "gpt-4.1-mini", - "gpt-4.1-nano", - "gpt-4.5-preview", - "gpt-4-turbo", - "gpt-4-turbo-preview", - "gpt-4", - "gpt-3.5-turbo" + "info": "The language model to use to generate the structured output.", + "input_types": [ + "LanguageModel" ], - "options_metadata": [], + "list": false, + "list_add_label": "Add More", + "name": "llm", "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, - "toggle": false, - "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "gpt-4o-mini" + "type": "other", + "value": "" }, - "provider": { - "_input_type": "DropdownInput", + "output_schema": { + "_input_type": "TableInput", "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Model Provider", + "display_name": "Output Schema", "dynamic": false, - "info": "Select the model provider", - "name": "provider", - "options": [ - "OpenAI", - "Anthropic", - "Google" - ], - "options_metadata": [ - { - "icon": "OpenAI" - }, - { - "icon": "Anthropic" - }, - { - "icon": "Google" - } - ], + "info": "Define the structure and data types for the model's output.", + "is_list": true, + "list_add_label": "Add More", + "name": "output_schema", "placeholder": "", - "real_time_refresh": true, - "required": false, + "required": true, "show": true, + "table_icon": "Table", + "table_schema": { + "columns": [ + { + "default": "field", + "description": "Specify the name of the output field.", + "disable_edit": false, + "display_name": "Name", + "edit_mode": "inline", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "name", + "sortable": true, + "type": "str" + }, + { + "default": "description of field", + "description": "Describe the purpose of the output field.", + "disable_edit": false, + "display_name": "Description", + "edit_mode": "popover", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "description", + "sortable": true, + "type": "str" + }, + { + "default": "str", + "description": "Indicate the data type of the output field (e.g., str, int, float, bool, dict).", + "disable_edit": false, + "display_name": "Type", + "edit_mode": "inline", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "type", + "options": [ + "str", + "int", + "float", + "bool", + "dict" + ], + "sortable": true, + "type": "str" + }, + { + "default": false, + "description": "Set to True if this output field should be a list of the specified type.", + "disable_edit": false, + "display_name": "As List", + "edit_mode": "inline", + "filterable": true, + "formatter": "boolean", + "hidden": false, + "name": "multiple", + "sortable": true, + "type": "boolean" + } + ] + }, "title_case": false, - "toggle": false, "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "OpenAI" + "trigger_icon": "Table", + "trigger_text": "Open table", + "type": "table", + "value": [ + { + "description": "description of field", + "multiple": "False", + "name": "field", + "type": "str" + } + ] }, - "stream": { - "_input_type": "BoolInput", + "schema_name": { + "_input_type": "MessageTextInput", "advanced": true, - "display_name": "Stream", + "display_name": "Schema Name", "dynamic": false, - "info": "Whether to stream the response", + "info": "Provide a name for the output data schema.", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", - "name": "stream", + "load_from_db": false, + "name": "schema_name", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "bool", - "value": false + "type": "str", + "value": "" }, - "system_message": { + "system_prompt": { "_input_type": "MultilineInput", "advanced": true, "copy_field": false, - "display_name": "System Message", + "display_name": "Format Instructions", "dynamic": false, - "info": "A system message that helps set the behavior of the assistant", + "info": "The instructions to the language model for formatting the output.", "input_types": [ "Message" ], @@ -2270,99 +2075,130 @@ "list_add_label": "Add More", "load_from_db": false, "multiline": true, - "name": "system_message", + "name": "system_prompt", "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, "tool_mode": false, "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "" - }, - "temperature": { - "_input_type": "SliderInput", - "advanced": true, - "display_name": "Temperature", - "dynamic": false, - "info": "Controls randomness in responses", - "max_label": "", - "max_label_icon": "", - "min_label": "", - "min_label_icon": "", - "name": "temperature", - "placeholder": "", - "range_spec": { - "max": 1, - "min": 0, - "step": 0.01, - "step_type": "float" - }, - "required": false, - "show": true, - "slider_buttons": false, - "slider_buttons_options": [], - "slider_input": false, - "title_case": false, - "tool_mode": false, - "type": "slider", - "value": 0.1 + "value": "You are an AI that extracts structured JSON objects from unstructured text. Use a predefined schema with expected types (str, int, float, bool, dict). Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. Fill missing or ambiguous values with defaults: null for missing values. Remove exact duplicates but keep variations that have different field values. Always return valid JSON in the expected format, never throw errors. If multiple objects can be extracted, return them all in the structured format." } }, "tool_mode": false }, - "selected_output": "text_output", - "showNode": true, - "type": "LanguageModelComponent" + "selected_output": "structured_output", + "showNode": true, + "type": "StructuredOutput" + }, + "dragging": false, + "id": "StructuredOutput-AUzID", + "measured": { + "height": 349, + "width": 320 + }, + "position": { + "x": 735.3215653605321, + "y": 423.7970360460631 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "note-IkRDS", + "node": { + "description": "# Hybrid Search RAG\n\nHybrid search performs a vector similarity search and a lexical search, compares the results of both searches, and then returns the most relevant results overall.\n\n## Prerequisites\n\n* An [OpenAI API key](https://platform.openai.com/)\n* An [Astra DB Application Token](https://docs.datastax.com/en/astra-db-serverless/databases/create-database.html) for the Astra DB component.\n\n## Quickstart\n\n1. In the Astra DB component, add your Astra DB Application Token.\nThis connects Langflow to your Astra database.\n2. Select an Astra collection that is hybrid-enabled.\nFor more information, see the [Datastax documentation](https://docs.datastax.com/en/astra-db-serverless/databases/hybrid-search.html).\nThe connection appears between the Parser component and the Astra DB component when a vector database is connected.\n3. Ensure the **Lexical Terms** and **Parsed Text** ports are connected.\n4. Add your OpenAI API key in the **Language Model** model component.\n5. Open the Playground and ask a question, like \"What are the features of my data?\"", + "display_name": "", + "documentation": "", + "template": { + "backgroundColor": "blue" + } + }, + "type": "note" }, "dragging": false, - "id": "LanguageModelComponent-NEQ8S", + "id": "note-IkRDS", "measured": { - "height": 451, - "width": 320 + "height": 601, + "width": 575 }, "position": { - "x": 320.756607335245, - "y": 486.0770655861057 + "x": 816.3801044575429, + "y": -279.19595575780494 }, "selected": false, - "type": "genericNode" + "type": "noteNode" }, { "data": { - "id": "LanguageModelComponent-pB4iD", + "id": "AstraDB-93cal", "node": { "base_classes": [ - "LanguageModel", - "Message" + "Data", + "DataFrame", + "VectorStore" ], "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Runs a language model given a specified provider. ", - "display_name": "Language Model", - "documentation": "", - "edited": false, + "description": "Ingest and search documents in Astra DB", + "display_name": "Astra DB", + "documentation": "https://docs.datastax.com/en/langflow/astra-components.html", + "edited": true, "field_order": [ - "provider", - "model_name", - "api_key", - "input_value", - "system_message", - "stream", - "temperature" + "token", + "environment", + "database_name", + "api_endpoint", + "keyspace", + "collection_name", + "embedding_model", + "ingest_data", + "search_query", + "should_cache_vector_store", + "search_method", + "reranker", + "lexical_terms", + "number_of_results", + "search_type", + "search_score_threshold", + "advanced_search_filter", + "autodetect_collection", + "content_field", + "deletion_field", + "ignore_invalid_documents", + "astradb_vectorstore_kwargs" ], "frozen": false, - "icon": "brain-circuit", + "icon": "AstraDB", "legacy": false, "metadata": { - "keywords": [ - "model", - "llm", - "language model", - "large language model" - ] + "code_hash": "a48033d3ac1b", + "dependencies": { + "dependencies": [ + { + "name": "astrapy", + "version": "2.0.1" + }, + { + "name": "langchain_astradb", + "version": "0.6.0" + }, + { + "name": "langchain_core", + "version": "0.3.72" + }, + { + "name": "langflow", + "version": null + } + ], + "total_dependencies": 4 + }, + "module": "custom_components.astra_db" }, "minimized": false, "output_types": [], @@ -2370,156 +2206,452 @@ { "allows_loop": false, "cache": true, - "display_name": "Model Response", + "display_name": "Search Results", "group_outputs": false, - "method": "text_response", - "name": "text_output", + "hidden": null, + "method": "search_documents", + "name": "search_results", "options": null, "required_inputs": null, - "selected": "Message", + "selected": "Data", "tool_mode": true, "types": [ - "Message" + "Data" ], "value": "__UNDEFINED__" }, { "allows_loop": false, "cache": true, - "display_name": "Language Model", + "display_name": "DataFrame", "group_outputs": false, - "method": "build_model", - "name": "model_output", + "hidden": null, + "method": "as_dataframe", + "name": "dataframe", "options": null, "required_inputs": null, - "selected": "LanguageModel", + "selected": "DataFrame", "tool_mode": true, "types": [ - "LanguageModel" + "DataFrame" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "Vector Store Connection", + "group_outputs": false, + "hidden": true, + "method": "as_vector_store", + "name": "vectorstoreconnection", + "options": null, + "required_inputs": null, + "selected": "VectorStore", + "tool_mode": true, + "types": [ + "VectorStore" ], "value": "__UNDEFINED__" } ], "pinned": false, - "priority": 0, "template": { "_type": "Component", - "api_key": { - "_input_type": "SecretStrInput", + "advanced_search_filter": { + "_input_type": "NestedDictInput", + "advanced": true, + "display_name": "Search Metadata Filter", + "dynamic": false, + "info": "Optional dictionary of filters to apply to the search query.", + "list": false, + "list_add_label": "Add More", + "name": "advanced_search_filter", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "NestedDict", + "value": {} + }, + "api_endpoint": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Astra DB API Endpoint", + "dynamic": false, + "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.", + "name": "api_endpoint", + "options": [], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "astradb_vectorstore_kwargs": { + "_input_type": "NestedDictInput", + "advanced": true, + "display_name": "AstraDBVectorStore Parameters", + "dynamic": false, + "info": "Optional dictionary of additional parameters for the AstraDBVectorStore.", + "list": false, + "list_add_label": "Add More", + "name": "astradb_vectorstore_kwargs", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "NestedDict", + "value": {} + }, + "autodetect_collection": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Autodetect Collection", + "dynamic": false, + "info": "Boolean flag to determine whether to autodetect the collection.", + "list": false, + "list_add_label": "Add More", + "name": "autodetect_collection", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n 'your database in Astra DB'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=True,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f''\n \"your database in Astra DB.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + }, + "collection_name": { + "_input_type": "DropdownInput", "advanced": false, - "display_name": "OpenAI API Key", + "combobox": true, + "dialog_inputs": { + "fields": { + "data": { + "node": { + "description": "Please allow several seconds for creation to complete.", + "display_name": "Create new collection", + "field_order": [ + "01_new_collection_name", + "02_embedding_generation_provider", + "03_embedding_generation_model", + "04_dimension" + ], + "name": "create_collection", + "template": { + "01_new_collection_name": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Name", + "dynamic": false, + "info": "Name of the new collection to create in Astra DB.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "new_collection_name", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "02_embedding_generation_provider": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Embedding generation method", + "dynamic": false, + "helper_text": "To create collections with more embedding provider options, go to your database in Astra DB", + "info": "Provider to use for generating embeddings.", + "name": "embedding_generation_provider", + "options": [], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "03_embedding_generation_model": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Embedding model", + "dynamic": false, + "info": "Model to use for generating embeddings.", + "name": "embedding_generation_model", + "options": [], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "04_dimension": { + "_input_type": "IntInput", + "advanced": false, + "display_name": "Dimensions", + "dynamic": false, + "info": "Dimensions of the embeddings to generate.", + "list": false, + "list_add_label": "Add More", + "name": "dimension", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int" + } + } + } + } + }, + "functionality": "create" + }, + "display_name": "Collection", "dynamic": false, - "info": "Model Provider API key", - "input_types": [], - "load_from_db": true, - "name": "api_key", - "password": true, + "info": "The name of the collection within Astra DB where the vectors will be stored.", + "name": "collection_name", + "options": [], + "options_metadata": [], "placeholder": "", "real_time_refresh": true, - "required": false, - "show": true, + "refresh_button": true, + "required": true, + "show": false, "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, "type": "str", - "value": "OPENAI_API_KEY" + "value": "" }, - "code": { + "content_field": { + "_input_type": "StrInput", "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", + "display_name": "Content Field", + "dynamic": false, + "info": "Field to use as the text content field for the vector store.", "list": false, + "list_add_label": "Add More", "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, + "name": "content_field", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, - "type": "code", - "value": "from typing import Any\n\nfrom langchain_anthropic import ChatAnthropic\nfrom langchain_google_genai import ChatGoogleGenerativeAI\nfrom langchain_openai import ChatOpenAI\n\nfrom langflow.base.models.anthropic_constants import ANTHROPIC_MODELS\nfrom langflow.base.models.google_generative_ai_constants import GOOGLE_GENERATIVE_AI_MODELS\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import OPENAI_CHAT_MODEL_NAMES, OPENAI_REASONING_MODEL_NAMES\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageInput, MultilineInput, SecretStrInput, SliderInput\nfrom langflow.schema.dotdict import dotdict\n\n\nclass LanguageModelComponent(LCModelComponent):\n display_name = \"Language Model\"\n description = \"Runs a language model given a specified provider.\"\n documentation: str = \"https://docs.langflow.org/components-models\"\n icon = \"brain-circuit\"\n category = \"models\"\n priority = 0 # Set priority to 0 to make it appear first\n\n inputs = [\n DropdownInput(\n name=\"provider\",\n display_name=\"Model Provider\",\n options=[\"OpenAI\", \"Anthropic\", \"Google\"],\n value=\"OpenAI\",\n info=\"Select the model provider\",\n real_time_refresh=True,\n options_metadata=[{\"icon\": \"OpenAI\"}, {\"icon\": \"Anthropic\"}, {\"icon\": \"GoogleGenerativeAI\"}],\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES,\n value=OPENAI_CHAT_MODEL_NAMES[0],\n info=\"Select the model to use\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"OpenAI API Key\",\n info=\"Model Provider API key\",\n required=False,\n show=True,\n real_time_refresh=True,\n ),\n MessageInput(\n name=\"input_value\",\n display_name=\"Input\",\n info=\"The input text to send to the model\",\n ),\n MultilineInput(\n name=\"system_message\",\n display_name=\"System Message\",\n info=\"A system message that helps set the behavior of the assistant\",\n advanced=False,\n ),\n BoolInput(\n name=\"stream\",\n display_name=\"Stream\",\n info=\"Whether to stream the response\",\n value=False,\n advanced=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n info=\"Controls randomness in responses\",\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n ]\n\n def build_model(self) -> LanguageModel:\n provider = self.provider\n model_name = self.model_name\n temperature = self.temperature\n stream = self.stream\n\n if provider == \"OpenAI\":\n if not self.api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n\n if model_name in OPENAI_REASONING_MODEL_NAMES:\n # reasoning models do not support temperature (yet)\n temperature = None\n\n return ChatOpenAI(\n model_name=model_name,\n temperature=temperature,\n streaming=stream,\n openai_api_key=self.api_key,\n )\n if provider == \"Anthropic\":\n if not self.api_key:\n msg = \"Anthropic API key is required when using Anthropic provider\"\n raise ValueError(msg)\n return ChatAnthropic(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n anthropic_api_key=self.api_key,\n )\n if provider == \"Google\":\n if not self.api_key:\n msg = \"Google API key is required when using Google provider\"\n raise ValueError(msg)\n return ChatGoogleGenerativeAI(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n google_api_key=self.api_key,\n )\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n if field_name == \"provider\":\n if field_value == \"OpenAI\":\n build_config[\"model_name\"][\"options\"] = OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES\n build_config[\"model_name\"][\"value\"] = OPENAI_CHAT_MODEL_NAMES[0]\n build_config[\"api_key\"][\"display_name\"] = \"OpenAI API Key\"\n elif field_value == \"Anthropic\":\n build_config[\"model_name\"][\"options\"] = ANTHROPIC_MODELS\n build_config[\"model_name\"][\"value\"] = ANTHROPIC_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Anthropic API Key\"\n elif field_value == \"Google\":\n build_config[\"model_name\"][\"options\"] = GOOGLE_GENERATIVE_AI_MODELS\n build_config[\"model_name\"][\"value\"] = GOOGLE_GENERATIVE_AI_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Google API Key\"\n elif field_name == \"model_name\" and field_value.startswith(\"o1\") and self.provider == \"OpenAI\":\n # Hide system_message for o1 models - currently unsupported\n if \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = False\n elif field_name == \"model_name\" and not field_value.startswith(\"o1\") and \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = True\n return build_config\n" + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" }, - "input_value": { - "_input_type": "MessageInput", + "database_name": { + "_input_type": "DropdownInput", "advanced": false, - "display_name": "Input", + "combobox": true, + "dialog_inputs": { + "fields": { + "data": { + "node": { + "description": "Please allow several minutes for creation to complete.", + "display_name": "Create new database", + "field_order": [ + "01_new_database_name", + "02_cloud_provider", + "03_region" + ], + "name": "create_database", + "template": { + "01_new_database_name": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Name", + "dynamic": false, + "info": "Name of the new database to create in Astra DB.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "new_database_name", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "02_cloud_provider": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Cloud provider", + "dynamic": false, + "info": "Cloud provider for the new database.", + "name": "cloud_provider", + "options": [], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "03_region": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Region", + "dynamic": false, + "info": "Region for the new database.", + "name": "region", + "options": [], + "options_metadata": [], + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + } + } + } + } + }, + "functionality": "create" + }, + "display_name": "Database", "dynamic": false, - "info": "The input text to send to the model", - "input_types": [ - "Message" - ], + "info": "The Database name for the Astra DB instance.", + "name": "database_name", + "options": [], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "refresh_button": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "deletion_field": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Deletion Based On Field", + "dynamic": false, + "info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.", "list": false, "list_add_label": "Add More", "load_from_db": false, - "name": "input_value", + "name": "deletion_field", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, "type": "str", "value": "" }, - "model_name": { - "_input_type": "DropdownInput", + "embedding_model": { + "_input_type": "HandleInput", "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Model Name", + "display_name": "Embedding Model", "dynamic": false, - "info": "Select the model to use", - "name": "model_name", - "options": [ - "gpt-4o-mini", - "gpt-4o", - "gpt-4.1", - "gpt-4.1-mini", - "gpt-4.1-nano", - "gpt-4.5-preview", - "gpt-4-turbo", - "gpt-4-turbo-preview", - "gpt-4", - "gpt-3.5-turbo" + "info": "Specify the Embedding Model. Not required for Astra Vectorize collections.", + "input_types": [ + "Embeddings" ], - "options_metadata": [], + "list": false, + "list_add_label": "Add More", + "name": "embedding_model", "placeholder": "", "required": false, - "show": true, + "show": false, "title_case": false, - "toggle": false, - "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "gpt-4o-mini" + "type": "other", + "value": "" }, - "provider": { + "environment": { "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, + "advanced": true, + "combobox": true, "dialog_inputs": {}, - "display_name": "Model Provider", + "display_name": "Environment", "dynamic": false, - "info": "Select the model provider", - "name": "provider", + "info": "The environment for the Astra DB API Endpoint.", + "name": "environment", "options": [ - "OpenAI", - "Anthropic", - "Google" - ], - "options_metadata": [ - { - "icon": "OpenAI" - }, - { - "icon": "Anthropic" - }, - { - "icon": "Google" - } + "prod", + "test", + "dev" ], + "options_metadata": [], "placeholder": "", "real_time_refresh": true, "required": false, @@ -2529,17 +2661,17 @@ "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "OpenAI" + "value": "prod" }, - "stream": { + "ignore_invalid_documents": { "_input_type": "BoolInput", "advanced": true, - "display_name": "Stream", + "display_name": "Ignore Invalid Documents", "dynamic": false, - "info": "Whether to stream the response", + "info": "Boolean flag to determine whether to ignore invalid documents at runtime.", "list": false, "list_add_label": "Add More", - "name": "stream", + "name": "ignore_invalid_documents", "placeholder": "", "required": false, "show": true, @@ -2549,416 +2681,278 @@ "type": "bool", "value": false }, - "system_message": { - "_input_type": "MultilineInput", + "ingest_data": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Ingest Data", + "dynamic": false, + "info": "", + "input_types": [ + "Data", + "DataFrame" + ], + "list": true, + "list_add_label": "Add More", + "name": "ingest_data", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "keyspace": { + "_input_type": "DropdownInput", "advanced": true, - "copy_field": false, - "display_name": "System Message", + "combobox": false, + "dialog_inputs": {}, + "display_name": "Keyspace", "dynamic": false, - "info": "A system message that helps set the behavior of the assistant", + "info": "Optional keyspace within Astra DB to use for the collection.", + "name": "keyspace", + "options": [], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "lexical_terms": { + "_input_type": "QueryInput", + "advanced": false, + "display_name": "Lexical Terms", + "dynamic": false, + "info": "Add additional terms/keywords to augment search precision.", "input_types": [ "Message" ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "multiline": true, - "name": "system_message", - "placeholder": "", + "name": "lexical_terms", + "placeholder": "Enter terms to search...", "required": false, + "separator": " ", "show": true, "title_case": false, "tool_mode": false, "trace_as_input": true, "trace_as_metadata": true, - "type": "str", + "type": "query", "value": "" }, - "temperature": { - "_input_type": "SliderInput", + "number_of_results": { + "_input_type": "IntInput", "advanced": true, - "display_name": "Temperature", + "display_name": "Number of Search Results", "dynamic": false, - "info": "Controls randomness in responses", - "max_label": "", - "max_label_icon": "", - "min_label": "", - "min_label_icon": "", - "name": "temperature", + "info": "Number of search results to return.", + "list": false, + "list_add_label": "Add More", + "name": "number_of_results", "placeholder": "", - "range_spec": { - "max": 1, - "min": 0, - "step": 0.01, - "step_type": "float" - }, "required": false, "show": true, - "slider_buttons": false, - "slider_buttons_options": [], - "slider_input": false, "title_case": false, "tool_mode": false, - "type": "slider", - "value": 0.1 - } - }, - "tool_mode": false - }, - "selected_output": "model_output", - "showNode": true, - "type": "LanguageModelComponent" - }, - "dragging": false, - "id": "LanguageModelComponent-pB4iD", - "measured": { - "height": 451, - "width": 320 - }, - "position": { - "x": 322.5971643968167, - "y": -36.64113990031162 - }, - "selected": false, - "type": "genericNode" - }, - { - "data": { - "id": "StructuredOutput-n8Y3t", - "node": { - "base_classes": [ - "Data" - ], - "beta": false, - "conditional_paths": [], - "custom_fields": {}, - "description": "Uses an LLM to generate structured data. Ideal for extraction and consistency.", - "display_name": "Structured Output", - "documentation": "", - "edited": false, - "field_order": [ - "llm", - "input_value", - "system_prompt", - "schema_name", - "output_schema" - ], - "frozen": false, - "icon": "braces", - "legacy": false, - "metadata": { - "code_hash": "ad2a6f4552c0", - "dependencies": { - "dependencies": [ - { - "name": "pydantic", - "version": "2.10.6" - }, - { - "name": "trustcall", - "version": "0.0.39" - }, - { - "name": "langflow", - "version": null - } - ], - "total_dependencies": 3 + "trace_as_metadata": true, + "type": "int", + "value": 4 }, - "module": "langflow.components.processing.structured_output.StructuredOutputComponent" - }, - "minimized": false, - "output_types": [], - "outputs": [ - { - "allows_loop": false, - "cache": true, - "display_name": "Structured Output", - "group_outputs": false, - "method": "build_structured_output", - "name": "structured_output", - "selected": "Data", - "tool_mode": true, - "types": [ - "Data" - ], - "value": "__UNDEFINED__" + "reranker": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Reranker", + "dynamic": false, + "info": "Post-retrieval model that re-scores results for optimal relevance ranking.", + "name": "reranker", + "options": [], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": false, + "title_case": false, + "toggle": true, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" }, - { - "allows_loop": false, - "cache": true, - "display_name": "Structured Output", - "group_outputs": false, - "method": "build_structured_dataframe", - "name": "dataframe_output", - "selected": "DataFrame", - "tool_mode": true, - "types": [ - "DataFrame" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "template": { - "_type": "Component", - "code": { + "search_method": { + "_input_type": "DropdownInput", "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Search Method", + "dynamic": false, + "info": "Determine how your content is matched: Vector finds semantic similarity, and Hybrid Search (suggested) combines both approaches with a reranker.", + "name": "search_method", + "options": [ + "Hybrid Search", + "Vector Search" + ], + "options_metadata": [ + { + "icon": "SearchHybrid" + }, + { + "icon": "SearchVector" + } + ], "placeholder": "", - "required": true, + "real_time_refresh": true, + "required": false, "show": true, "title_case": false, - "type": "code", - "value": "from pydantic import BaseModel, Field, create_model\nfrom trustcall import create_extractor\n\nfrom langflow.base.models.chat_result import get_chat_result\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n HandleInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.table import EditMode\n\n\nclass StructuredOutputComponent(Component):\n display_name = \"Structured Output\"\n description = \"Uses an LLM to generate structured data. Ideal for extraction and consistency.\"\n documentation: str = \"https://docs.langflow.org/components-processing#structured-output\"\n name = \"StructuredOutput\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"llm\",\n display_name=\"Language Model\",\n info=\"The language model to use to generate the structured output.\",\n input_types=[\"LanguageModel\"],\n required=True,\n ),\n MultilineInput(\n name=\"input_value\",\n display_name=\"Input Message\",\n info=\"The input message to the language model.\",\n tool_mode=True,\n required=True,\n ),\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Format Instructions\",\n info=\"The instructions to the language model for formatting the output.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n required=True,\n advanced=True,\n ),\n MessageTextInput(\n name=\"schema_name\",\n display_name=\"Schema Name\",\n info=\"Provide a name for the output data schema.\",\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=\"Define the structure and data types for the model's output.\",\n required=True,\n # TODO: remove deault value\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n value=[\n {\n \"name\": \"field\",\n \"description\": \"description of field\",\n \"type\": \"str\",\n \"multiple\": \"False\",\n }\n ],\n ),\n ]\n\n outputs = [\n Output(\n name=\"structured_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_output\",\n ),\n Output(\n name=\"dataframe_output\",\n display_name=\"Structured Output\",\n method=\"build_structured_dataframe\",\n ),\n ]\n\n def build_structured_output_base(self):\n schema_name = self.schema_name or \"OutputModel\"\n\n if not hasattr(self.llm, \"with_structured_output\"):\n msg = \"Language model does not support structured output.\"\n raise TypeError(msg)\n if not self.output_schema:\n msg = \"Output schema cannot be empty\"\n raise ValueError(msg)\n\n output_model_ = build_model_from_schema(self.output_schema)\n\n output_model = create_model(\n schema_name,\n __doc__=f\"A list of {schema_name}.\",\n objects=(list[output_model_], Field(description=f\"A list of {schema_name}.\")), # type: ignore[valid-type]\n )\n\n try:\n llm_with_structured_output = create_extractor(self.llm, tools=[output_model])\n except NotImplementedError as exc:\n msg = f\"{self.llm.__class__.__name__} does not support structured output.\"\n raise TypeError(msg) from exc\n\n config_dict = {\n \"run_name\": self.display_name,\n \"project_name\": self.get_project_name(),\n \"callbacks\": self.get_langchain_callbacks(),\n }\n result = get_chat_result(\n runnable=llm_with_structured_output,\n system_message=self.system_prompt,\n input_value=self.input_value,\n config=config_dict,\n )\n\n # OPTIMIZATION NOTE: Simplified processing based on trustcall response structure\n # Handle non-dict responses (shouldn't happen with trustcall, but defensive)\n if not isinstance(result, dict):\n return result\n\n # Extract first response and convert BaseModel to dict\n responses = result.get(\"responses\", [])\n if not responses:\n return result\n\n # Convert BaseModel to dict (creates the \"objects\" key)\n first_response = responses[0]\n structured_data = first_response.model_dump() if isinstance(first_response, BaseModel) else first_response\n\n # Extract the objects array (guaranteed to exist due to our Pydantic model structure)\n return structured_data.get(\"objects\", structured_data)\n\n def build_structured_output(self) -> Data:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n if len(output) == 1:\n return Data(data=output[0])\n if len(output) > 1:\n # Multiple outputs - wrap them in a results container\n return Data(data={\"results\": output})\n return Data()\n\n def build_structured_dataframe(self) -> DataFrame:\n output = self.build_structured_output_base()\n if not isinstance(output, list) or not output:\n # handle empty or unexpected type case\n msg = \"No structured output returned\"\n raise ValueError(msg)\n data_list = [Data(data=output[0])] if len(output) == 1 else [Data(data=item) for item in output]\n\n return DataFrame(data_list)\n" + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Vector Search" }, - "input_value": { - "_input_type": "MessageTextInput", + "search_query": { + "_input_type": "QueryInput", "advanced": false, - "display_name": "Input Message", + "display_name": "Search Query", "dynamic": false, - "info": "The input message to the language model.", + "info": "Enter a query to run a similarity search.", "input_types": [ "Message" ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "name": "input_value", - "placeholder": "", - "required": true, + "name": "search_query", + "placeholder": "Enter a query...", + "required": false, "show": true, "title_case": false, "tool_mode": true, "trace_as_input": true, "trace_as_metadata": true, - "type": "str", + "type": "query", "value": "" }, - "llm": { - "_input_type": "HandleInput", - "advanced": false, - "display_name": "Language Model", + "search_score_threshold": { + "_input_type": "FloatInput", + "advanced": true, + "display_name": "Search Score Threshold", "dynamic": false, - "info": "The language model to use to generate the structured output.", - "input_types": [ - "LanguageModel" - ], + "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')", "list": false, "list_add_label": "Add More", - "name": "llm", + "name": "search_score_threshold", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, + "tool_mode": false, "trace_as_metadata": true, - "type": "other", - "value": "" + "type": "float", + "value": 0 }, - "output_schema": { - "_input_type": "TableInput", - "advanced": false, - "display_name": "Output Schema", + "search_type": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Search Type", "dynamic": false, - "info": "Define the structure and data types for the model's output.", - "is_list": true, - "list_add_label": "Add More", - "name": "output_schema", + "info": "Search type to use", + "name": "search_type", + "options": [ + "Similarity", + "Similarity with score threshold", + "MMR (Max Marginal Relevance)" + ], + "options_metadata": [], "placeholder": "", - "required": true, + "required": false, "show": true, - "table_icon": "Table", - "table_schema": { - "columns": [ - { - "default": "field", - "description": "Specify the name of the output field.", - "disable_edit": false, - "display_name": "Name", - "edit_mode": "inline", - "filterable": true, - "formatter": "text", - "hidden": false, - "name": "name", - "sortable": true, - "type": "str" - }, - { - "default": "description of field", - "description": "Describe the purpose of the output field.", - "disable_edit": false, - "display_name": "Description", - "edit_mode": "popover", - "filterable": true, - "formatter": "text", - "hidden": false, - "name": "description", - "sortable": true, - "type": "str" - }, - { - "default": "str", - "description": "Indicate the data type of the output field (e.g., str, int, float, bool, dict).", - "disable_edit": false, - "display_name": "Type", - "edit_mode": "inline", - "filterable": true, - "formatter": "text", - "hidden": false, - "name": "type", - "options": [ - "str", - "int", - "float", - "bool", - "dict" - ], - "sortable": true, - "type": "str" - }, - { - "default": false, - "description": "Set to True if this output field should be a list of the specified type.", - "disable_edit": false, - "display_name": "As List", - "edit_mode": "inline", - "filterable": true, - "formatter": "boolean", - "hidden": false, - "name": "multiple", - "sortable": true, - "type": "boolean" - } - ] - }, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, - "trigger_icon": "Table", - "trigger_text": "Open table", - "type": "table", - "value": [ - { - "description": "description of field", - "multiple": "False", - "name": "field", - "type": "str" - } - ] + "type": "str", + "value": "Similarity" }, - "schema_name": { - "_input_type": "MessageTextInput", + "should_cache_vector_store": { + "_input_type": "BoolInput", "advanced": true, - "display_name": "Schema Name", + "display_name": "Cache Vector Store", "dynamic": false, - "info": "Provide a name for the output data schema.", - "input_types": [ - "Message" - ], + "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.", "list": false, "list_add_label": "Add More", - "load_from_db": false, - "name": "schema_name", + "name": "should_cache_vector_store", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, - "type": "str", - "value": "" + "type": "bool", + "value": true }, - "system_prompt": { - "_input_type": "MultilineInput", - "advanced": true, - "copy_field": false, - "display_name": "Format Instructions", + "token": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Astra DB Application Token", "dynamic": false, - "info": "The instructions to the language model for formatting the output.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "multiline": true, - "name": "system_prompt", + "info": "Authentication token for accessing Astra DB.", + "input_types": [], + "load_from_db": true, + "name": "token", + "password": true, "placeholder": "", + "real_time_refresh": true, "required": true, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, "type": "str", - "value": "You are an AI that extracts structured JSON objects from unstructured text. Use a predefined schema with expected types (str, int, float, bool, dict). Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. Fill missing or ambiguous values with defaults: null for missing values. Remove exact duplicates but keep variations that have different field values. Always return valid JSON in the expected format, never throw errors. If multiple objects can be extracted, return them all in the structured format." + "value": "ASTRA_DB_APPLICATION_TOKEN" } }, "tool_mode": false }, + "selected_output": "search_results", "showNode": true, - "type": "StructuredOutput" + "type": "AstraDB" }, "dragging": false, - "id": "StructuredOutput-n8Y3t", + "id": "AstraDB-93cal", "measured": { - "height": 349, + "height": 540, "width": 320 }, "position": { - "x": 735.3215653605321, - "y": 423.7970360460631 + "x": 1552.5270288197573, + "y": 310.92605536703144 }, "selected": false, "type": "genericNode" - }, - { - "data": { - "id": "note-AJ1HC", - "node": { - "description": "# Hybrid Search RAG\n\nHybrid search performs a vector similarity search and a lexical search, compares the results of both searches, and then returns the most relevant results overall.\n\n## Prerequisites\n\n* An [OpenAI API key](https://platform.openai.com/)\n* An [Astra DB Application Token](https://docs.datastax.com/en/astra-db-serverless/databases/create-database.html) for the Astra DB component.\n\n## Quickstart\n\n1. In the Astra DB component, add your Astra DB Application Token.\nThis connects Langflow to your Astra database.\n2. Select an Astra collection that is hybrid-enabled.\nFor more information, see the [Datastax documentation](https://docs.datastax.com/en/astra-db-serverless/databases/hybrid-search.html).\nThe connection appears between the Parser component and the Astra DB component when a vector database is connected.\n3. Ensure the **Lexical Terms** and **Parsed Text** ports are connected.\n4. Add your OpenAI API key in the **Language Model** model component.\n5. Open the Playground and ask a question, like \"What are the features of my data?\"", - "display_name": "", - "documentation": "", - "template": { - "backgroundColor": "blue" - } - }, - "type": "note" - }, - "dragging": false, - "id": "note-AJ1HC", - "measured": { - "height": 601, - "width": 575 - }, - "position": { - "x": 816.3801044575429, - "y": -279.19595575780494 - }, - "selected": false, - "type": "noteNode" } ], "viewport": { - "x": 28.84866644052977, - "y": 276.30129659855504, - "zoom": 0.5265349644912217 + "x": -29.911832824936937, + "y": 88.77245200098008, + "zoom": 0.582863818810844 } }, "description": "Explore Hybrid Search with a vector database.", "endpoint_name": null, - "id": "cd58d400-fe60-47c6-b2b5-4a7d3eada7b1", + "id": "be9c7480-a8a2-4a12-ab32-67c1432e1504", "is_component": false, - "last_tested_version": "1.4.3", + "last_tested_version": "1.5.0.post2", "name": "Hybrid Search RAG", "tags": [ "openai", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index 5a33dbd7933c..969918dc3871 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -7,7 +7,7 @@ "data": { "sourceHandle": { "dataType": "ChatInput", - "id": "ChatInput-et7o5", + "id": "ChatInput-zueUA", "name": "message", "output_types": [ "Message" @@ -15,7 +15,7 @@ }, "targetHandle": { "fieldName": "question", - "id": "Prompt-V3tlJ", + "id": "Prompt-oDuVT", "inputTypes": [ "Message", "Text" @@ -23,12 +23,12 @@ "type": "str" } }, - "id": "reactflow__edge-ChatInput-et7o5{œdataTypeœ:œChatInputœ,œidœ:œChatInput-et7o5œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-V3tlJ{œfieldNameœ:œquestionœ,œidœ:œPrompt-V3tlJœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-ChatInput-zueUA{œdataTypeœ:œChatInputœ,œidœ:œChatInput-zueUAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-oDuVT{œfieldNameœ:œquestionœ,œidœ:œPrompt-oDuVTœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", "selected": false, - "source": "ChatInput-et7o5", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-et7o5œ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-V3tlJ", - "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-V3tlJœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" + "source": "ChatInput-zueUA", + "sourceHandle": "{œdataTypeœ:œChatInputœ,œidœ:œChatInput-zueUAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "target": "Prompt-oDuVT", + "targetHandle": "{œfieldNameœ:œquestionœ,œidœ:œPrompt-oDuVTœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}" }, { "animated": false, @@ -36,7 +36,7 @@ "data": { "sourceHandle": { "dataType": "parser", - "id": "parser-WUXPk", + "id": "parser-CbWBG", "name": "parsed_text", "output_types": [ "Message" @@ -44,7 +44,7 @@ }, "targetHandle": { "fieldName": "context", - "id": "Prompt-V3tlJ", + "id": "Prompt-oDuVT", "inputTypes": [ "Message", "Text" @@ -52,125 +52,100 @@ "type": "str" } }, - "id": "reactflow__edge-parser-WUXPk{œdataTypeœ:œparserœ,œidœ:œparser-WUXPkœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-V3tlJ{œfieldNameœ:œcontextœ,œidœ:œPrompt-V3tlJœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-parser-CbWBG{œdataTypeœ:œparserœ,œidœ:œparser-CbWBGœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-oDuVT{œfieldNameœ:œcontextœ,œidœ:œPrompt-oDuVTœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", "selected": false, - "source": "parser-WUXPk", - "sourceHandle": "{œdataTypeœ: œparserœ, œidœ: œparser-WUXPkœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-V3tlJ", - "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-V3tlJœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" + "source": "parser-CbWBG", + "sourceHandle": "{œdataTypeœ:œparserœ,œidœ:œparser-CbWBGœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}", + "target": "Prompt-oDuVT", + "targetHandle": "{œfieldNameœ:œcontextœ,œidœ:œPrompt-oDuVTœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "OpenAIEmbeddings", - "id": "OpenAIEmbeddings-oFtHy", - "name": "embeddings", - "output_types": [ - "Embeddings" - ] - }, - "targetHandle": { - "fieldName": "embedding_model", - "id": "AstraDB-W6NB4", - "inputTypes": [ - "Embeddings" - ], - "type": "other" - } - }, - "id": "reactflow__edge-OpenAIEmbeddings-oFtHy{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-oFtHyœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-W6NB4{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-W6NB4œ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", - "selected": false, - "source": "OpenAIEmbeddings-oFtHy", - "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-oFtHyœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", - "target": "AstraDB-W6NB4", - "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-W6NB4œ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" - }, - { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "OpenAIEmbeddings", - "id": "OpenAIEmbeddings-v0rcw", - "name": "embeddings", + "dataType": "File", + "id": "File-PvlCh", + "name": "message", "output_types": [ - "Embeddings" + "Message" ] }, "targetHandle": { - "fieldName": "embedding_model", - "id": "AstraDB-JsRrT", + "fieldName": "data_inputs", + "id": "SplitText-l3kJG", "inputTypes": [ - "Embeddings" + "Data", + "DataFrame", + "Message" ], "type": "other" } }, - "id": "reactflow__edge-OpenAIEmbeddings-v0rcw{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-v0rcwœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-JsRrT{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-JsRrTœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-File-PvlCh{œdataTypeœ:œFileœ,œidœ:œFile-PvlChœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-SplitText-l3kJG{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-l3kJGœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", "selected": false, - "source": "OpenAIEmbeddings-v0rcw", - "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-v0rcwœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", - "target": "AstraDB-JsRrT", - "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-JsRrTœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" + "source": "File-PvlCh", + "sourceHandle": "{œdataTypeœ:œFileœ,œidœ:œFile-PvlChœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "target": "SplitText-l3kJG", + "targetHandle": "{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-l3kJGœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "ChatInput", - "id": "ChatInput-et7o5", - "name": "message", + "dataType": "Prompt", + "id": "Prompt-oDuVT", + "name": "prompt", "output_types": [ "Message" ] }, "targetHandle": { - "fieldName": "search_query", - "id": "AstraDB-JsRrT", + "fieldName": "input_value", + "id": "LanguageModelComponent-9vLb9", "inputTypes": [ "Message" ], - "type": "query" + "type": "str" } }, - "id": "reactflow__edge-ChatInput-et7o5{œdataTypeœ:œChatInputœ,œidœ:œChatInput-et7o5œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-JsRrT{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-JsRrTœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", + "id": "reactflow__edge-Prompt-oDuVT{œdataTypeœ:œPromptœ,œidœ:œPrompt-oDuVTœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-LanguageModelComponent-9vLb9{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-9vLb9œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "ChatInput-et7o5", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-et7o5œ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "AstraDB-JsRrT", - "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-JsRrTœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}" + "source": "Prompt-oDuVT", + "sourceHandle": "{œdataTypeœ:œPromptœ,œidœ:œPrompt-oDuVTœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}", + "target": "LanguageModelComponent-9vLb9", + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-9vLb9œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "AstraDB", - "id": "AstraDB-JsRrT", - "name": "dataframe", + "dataType": "LanguageModelComponent", + "id": "LanguageModelComponent-9vLb9", + "name": "text_output", "output_types": [ - "DataFrame" + "Message" ] }, "targetHandle": { - "fieldName": "input_data", - "id": "parser-WUXPk", + "fieldName": "input_value", + "id": "ChatOutput-NH6u1", "inputTypes": [ + "Data", "DataFrame", - "Data" + "Message" ], - "type": "other" + "type": "str" } }, - "id": "reactflow__edge-AstraDB-JsRrT{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-JsRrTœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-parser-WUXPk{œfieldNameœ:œinput_dataœ,œidœ:œparser-WUXPkœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-LanguageModelComponent-9vLb9{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-9vLb9œ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-NH6u1{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-NH6u1œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "AstraDB-JsRrT", - "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-JsRrTœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", - "target": "parser-WUXPk", - "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œparser-WUXPkœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" + "source": "LanguageModelComponent-9vLb9", + "sourceHandle": "{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-9vLb9œ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}", + "target": "ChatOutput-NH6u1", + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-NH6u1œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}" }, { "animated": false, @@ -178,7 +153,7 @@ "data": { "sourceHandle": { "dataType": "SplitText", - "id": "SplitText-6H5cD", + "id": "SplitText-l3kJG", "name": "dataframe", "output_types": [ "DataFrame" @@ -186,7 +161,7 @@ }, "targetHandle": { "fieldName": "ingest_data", - "id": "AstraDB-W6NB4", + "id": "AstraDB-s5fQW", "inputTypes": [ "Data", "DataFrame" @@ -194,100 +169,65 @@ "type": "other" } }, - "id": "reactflow__edge-SplitText-6H5cD{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-6H5cDœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-AstraDB-W6NB4{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-W6NB4œ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", + "id": "xy-edge__SplitText-l3kJG{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-l3kJGœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-AstraDB-s5fQW{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-s5fQWœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", "selected": false, - "source": "SplitText-6H5cD", - "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-6H5cDœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", - "target": "AstraDB-W6NB4", - "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-W6NB4œ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" + "source": "SplitText-l3kJG", + "sourceHandle": "{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-l3kJGœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}", + "target": "AstraDB-s5fQW", + "targetHandle": "{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-s5fQWœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}" }, { - "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "File", - "id": "File-vusZ2", + "dataType": "ChatInput", + "id": "ChatInput-zueUA", "name": "message", "output_types": [ "Message" ] }, "targetHandle": { - "fieldName": "data_inputs", - "id": "SplitText-6H5cD", - "inputTypes": [ - "Data", - "DataFrame", - "Message" - ], - "type": "other" - } - }, - "id": "reactflow__edge-File-vusZ2{œdataTypeœ:œFileœ,œidœ:œFile-vusZ2œ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-SplitText-6H5cD{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-6H5cDœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", - "selected": false, - "source": "File-vusZ2", - "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-vusZ2œ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "SplitText-6H5cD", - "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-6H5cDœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}" - }, - { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "Prompt", - "id": "Prompt-V3tlJ", - "name": "prompt", - "output_types": [ - "Message" - ] - }, - "targetHandle": { - "fieldName": "input_value", - "id": "LanguageModelComponent-1uhUK", + "fieldName": "search_query", + "id": "AstraDB-WAq6g", "inputTypes": [ "Message" ], - "type": "str" + "type": "query" } }, - "id": "reactflow__edge-Prompt-V3tlJ{œdataTypeœ:œPromptœ,œidœ:œPrompt-V3tlJœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-LanguageModelComponent-1uhUK{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-1uhUKœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "selected": false, - "source": "Prompt-V3tlJ", - "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-V3tlJœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", - "target": "LanguageModelComponent-1uhUK", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œLanguageModelComponent-1uhUKœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "id": "xy-edge__ChatInput-zueUA{œdataTypeœ:œChatInputœ,œidœ:œChatInput-zueUAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-WAq6g{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-WAq6gœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", + "source": "ChatInput-zueUA", + "sourceHandle": "{œdataTypeœ:œChatInputœ,œidœ:œChatInput-zueUAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "target": "AstraDB-WAq6g", + "targetHandle": "{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-WAq6gœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}" }, { - "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "LanguageModelComponent", - "id": "LanguageModelComponent-1uhUK", - "name": "text_output", + "dataType": "AstraDB", + "id": "AstraDB-WAq6g", + "name": "search_results", "output_types": [ - "Message" + "Data" ] }, "targetHandle": { - "fieldName": "input_value", - "id": "ChatOutput-ZaYDW", + "fieldName": "input_data", + "id": "parser-CbWBG", "inputTypes": [ - "Data", "DataFrame", - "Message" + "Data" ], - "type": "str" + "type": "other" } }, - "id": "reactflow__edge-LanguageModelComponent-1uhUK{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-1uhUKœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-ZaYDW{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-ZaYDWœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}", - "selected": false, - "source": "LanguageModelComponent-1uhUK", - "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-1uhUKœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", - "target": "ChatOutput-ZaYDW", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-ZaYDWœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œstrœ}" + "id": "xy-edge__AstraDB-WAq6g{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-WAq6gœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}-parser-CbWBG{œfieldNameœ:œinput_dataœ,œidœ:œparser-CbWBGœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", + "source": "AstraDB-WAq6g", + "sourceHandle": "{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-WAq6gœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}", + "target": "parser-CbWBG", + "targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œparser-CbWBGœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}" } ], "nodes": [ @@ -295,7 +235,7 @@ "data": { "description": "Get chat inputs from the Playground.", "display_name": "Chat Input", - "id": "ChatInput-et7o5", + "id": "ChatInput-zueUA", "node": { "base_classes": [ "Message" @@ -574,7 +514,7 @@ }, "dragging": false, "height": 234, - "id": "ChatInput-et7o5", + "id": "ChatInput-zueUA", "measured": { "height": 234, "width": 320 @@ -595,7 +535,7 @@ "data": { "description": "Create a prompt template with dynamic variables.", "display_name": "Prompt", - "id": "Prompt-V3tlJ", + "id": "Prompt-oDuVT", "node": { "base_classes": [ "Message" @@ -759,7 +699,7 @@ }, "dragging": false, "height": 433, - "id": "Prompt-V3tlJ", + "id": "Prompt-oDuVT", "measured": { "height": 433, "width": 320 @@ -780,7 +720,7 @@ "data": { "description": "Split text into chunks based on specified criteria.", "display_name": "Split Text", - "id": "SplitText-6H5cD", + "id": "SplitText-l3kJG", "node": { "base_classes": [ "Data" @@ -982,7 +922,7 @@ }, "dragging": false, "height": 475, - "id": "SplitText-6H5cD", + "id": "SplitText-l3kJG", "measured": { "height": 475, "width": 320 @@ -1001,7 +941,7 @@ }, { "data": { - "id": "note-rR7dl", + "id": "note-XRjSv", "node": { "description": "## 🐕 2. Retriever Flow\n\nThis flow answers your questions with contextual data retrieved from your vector database.\n\nOpen the **Playground** and ask, \n\n```\nWhat is this document about?\n```\n", "display_name": "", @@ -1014,7 +954,7 @@ }, "dragging": false, "height": 324, - "id": "note-rR7dl", + "id": "note-XRjSv", "measured": { "height": 324, "width": 324 @@ -1038,7 +978,7 @@ }, { "data": { - "id": "note-zTYux", + "id": "note-Z0mzM", "node": { "description": "Retrieval Augmented Generation (RAG) is a way of providing additional context to a Large Language Model (LLM) by preloading a vector database with embeddings for relevant content. When a user chats with the LLM, a _similarity search_ retrieves relevant content by comparing an embedding for the user's query against the embeddings in the vector database.\nFor example, a RAG chatbot could be pre-loaded with product data, and then it can help customers find specific products based on their queries.\nThis template has two sub-flows. One flow loads data into your vector store, and the other is the user-driven chat flow that compares a new query against the existing content in your vector database.\n\n## Quickstart\n1. Add your OpenAI API key to the **Language Model** component and the two **Embeddings** components.\n2. Add an Astra application token to the **Astra DB** vector store components, or replace these components with other vector store components available in the **Components** menu.\n**💡 Store your credentials as Langflow global variables 🌐 to simplify token management and reuse in your flows.**\n\n## Run the flows\n1. Load your data into a vector database with the 📚 **Load Data** flow. Select a file to upload in the **File** component, and then click **Play** ▶️ on the **Astra DB** component to run the **Load Data** flow.\n2. Open the **Playground** to start a chat with the 🐕 **Retriever** flow.\n\nOnly the run the **Load Data** flow when you need to populate your vector database with baseline content, such as product data.\nThe **Retriever** flow is the user-facing chat flow. This flow generates an embedding from chat input, runs a similarity search against the vector database to retrieve relevant content, and then passes the original query and the retrieved content to the LLM, which produces the chat response sent to the user.\n\n## Next steps\nExperiment by changing the prompt and the loaded data to see how the LLM's responses change.", "display_name": "Read Me", @@ -1051,7 +991,7 @@ }, "dragging": false, "height": 556, - "id": "note-zTYux", + "id": "note-Z0mzM", "measured": { "height": 556, "width": 389 @@ -1077,7 +1017,7 @@ "data": { "description": "Display a chat message in the Playground.", "display_name": "Chat Output", - "id": "ChatOutput-ZaYDW", + "id": "ChatOutput-NH6u1", "node": { "base_classes": [ "Message" @@ -1380,7 +1320,7 @@ }, "dragging": false, "height": 234, - "id": "ChatOutput-ZaYDW", + "id": "ChatOutput-NH6u1", "measured": { "height": 234, "width": 320 @@ -1399,7 +1339,7 @@ }, { "data": { - "id": "OpenAIEmbeddings-v0rcw", + "id": "OpenAIEmbeddings-3lfxG", "node": { "base_classes": [ "Embeddings" @@ -1892,7 +1832,7 @@ }, "dragging": false, "height": 320, - "id": "OpenAIEmbeddings-v0rcw", + "id": "OpenAIEmbeddings-3lfxG", "measured": { "height": 320, "width": 320 @@ -1911,7 +1851,7 @@ }, { "data": { - "id": "note-i07aq", + "id": "note-nD1ad", "node": { "description": "## 📚 1. Load Data Flow\n\nRun this first! Load data from a local file and embed it into the vector database.\n\nSelect a Database and a Collection, or create new ones. \n\nClick **Run component** on the **Astra DB** component to load your data.\n\n\n### Next steps:\n Experiment by changing the prompt and the contextual data to see how the retrieval flow's responses change.", "display_name": "", @@ -1924,7 +1864,7 @@ }, "dragging": false, "height": 460, - "id": "note-i07aq", + "id": "note-nD1ad", "measured": { "height": 460, "width": 340 @@ -1948,7 +1888,7 @@ }, { "data": { - "id": "OpenAIEmbeddings-oFtHy", + "id": "OpenAIEmbeddings-3XP2J", "node": { "base_classes": [ "Embeddings" @@ -2441,7 +2381,7 @@ }, "dragging": false, "height": 320, - "id": "OpenAIEmbeddings-oFtHy", + "id": "OpenAIEmbeddings-3XP2J", "measured": { "height": 320, "width": 320 @@ -2460,7 +2400,7 @@ }, { "data": { - "id": "note-YhXtV", + "id": "note-Yb8sb", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2473,7 +2413,7 @@ }, "dragging": false, "height": 324, - "id": "note-YhXtV", + "id": "note-Yb8sb", "measured": { "height": 324, "width": 324 @@ -2492,7 +2432,7 @@ }, { "data": { - "id": "note-AdydJ", + "id": "note-jdw5F", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2505,7 +2445,7 @@ }, "dragging": false, "height": 324, - "id": "note-AdydJ", + "id": "note-jdw5F", "measured": { "height": 324, "width": 324 @@ -2524,7 +2464,7 @@ }, { "data": { - "id": "note-aBKhj", + "id": "note-Fps8P", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2537,7 +2477,7 @@ }, "dragging": false, "height": 324, - "id": "note-aBKhj", + "id": "note-Fps8P", "measured": { "height": 324, "width": 324 @@ -2556,7 +2496,7 @@ }, { "data": { - "id": "parser-WUXPk", + "id": "parser-CbWBG", "node": { "base_classes": [ "Message" @@ -2718,7 +2658,7 @@ "type": "parser" }, "dragging": false, - "id": "parser-WUXPk", + "id": "parser-CbWBG", "measured": { "height": 361, "width": 320 @@ -2732,189 +2672,141 @@ }, { "data": { - "id": "AstraDB-JsRrT", + "id": "File-PvlCh", "node": { "base_classes": [ - "Data", - "DataFrame", - "VectorStore" + "Message" ], "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Ingest and search documents in Astra DB", - "display_name": "Astra DB", - "documentation": "https://docs.datastax.com/en/langflow/astra-components.html", + "description": "Loads content from one or more files as a DataFrame.", + "display_name": "File", + "documentation": "", "edited": false, "field_order": [ - "token", - "environment", - "database_name", - "api_endpoint", - "keyspace", - "collection_name", - "embedding_model", - "ingest_data", - "search_query", - "should_cache_vector_store", - "search_method", - "reranker", - "lexical_terms", - "number_of_results", - "search_type", - "search_score_threshold", - "advanced_search_filter", - "autodetect_collection", - "content_field", - "deletion_field", - "ignore_invalid_documents", - "astradb_vectorstore_kwargs" + "path", + "file_path", + "separator", + "silent_errors", + "delete_server_file_after_processing", + "ignore_unsupported_extensions", + "ignore_unspecified_files", + "use_multithreading", + "concurrency_multithreading" ], "frozen": false, - "icon": "AstraDB", + "icon": "file-text", + "last_updated": "2025-08-26T16:30:56.653Z", "legacy": false, - "metadata": { - "code_hash": "23fbe9daca09", - "dependencies": { - "dependencies": [ - { - "name": "astrapy", - "version": "2.0.1" - }, - { - "name": "langchain_astradb", - "version": "0.6.0" - }, - { - "name": "langchain_core", - "version": "0.3.72" - }, - { - "name": "langflow", - "version": null - } - ], - "total_dependencies": 4 - }, - "module": "langflow.components.datastax.astradb.AstraDBVectorStoreComponent" - }, + "metadata": {}, "minimized": false, "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "Search Results", - "group_outputs": false, - "method": "search_documents", - "name": "search_results", - "selected": "Data", - "tool_mode": true, - "types": [ - "Data" - ], - "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "DataFrame", - "group_outputs": false, - "method": "as_dataframe", - "name": "dataframe", - "selected": "DataFrame", - "tool_mode": true, - "types": [ - "DataFrame" - ], - "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "Vector Store Connection", + "display_name": "Raw Content", "group_outputs": false, - "hidden": true, - "method": "as_vector_store", - "name": "vectorstoreconnection", - "selected": "VectorStore", + "method": "load_files_message", + "name": "message", + "options": null, + "required_inputs": null, + "selected": "Message", "tool_mode": true, "types": [ - "VectorStore" + "Message" ], "value": "__UNDEFINED__" } ], "pinned": false, - "selected_output": "dataframe", "template": { "_type": "Component", - "advanced_search_filter": { - "_input_type": "NestedDictInput", + "code": { "advanced": true, - "display_name": "Search Metadata Filter", - "dynamic": false, - "info": "Optional dictionary of filters to apply to the search query.", + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", "list": false, - "list_add_label": "Add More", - "name": "advanced_search_filter", + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "NestedDict", - "value": {} + "type": "code", + "value": "\"\"\"Enhanced file component v2 with mypy and ruff compliance.\"\"\"\n\nfrom __future__ import annotations\n\nfrom copy import deepcopy\nfrom enum import Enum\nfrom typing import TYPE_CHECKING, Any\n\nfrom langflow.base.data.base_file import BaseFileComponent\nfrom langflow.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n FileInput,\n IntInput,\n MessageTextInput,\n Output,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.message import Message\n\nif TYPE_CHECKING:\n from langflow.schema import DataFrame\n\n\nclass MockConversionStatus(Enum):\n \"\"\"Mock ConversionStatus for fallback compatibility.\"\"\"\n\n SUCCESS = \"success\"\n FAILURE = \"failure\"\n\n\nclass MockInputFormat(Enum):\n \"\"\"Mock InputFormat for fallback compatibility.\"\"\"\n\n PDF = \"pdf\"\n IMAGE = \"image\"\n\n\nclass MockImageRefMode(Enum):\n \"\"\"Mock ImageRefMode for fallback compatibility.\"\"\"\n\n PLACEHOLDER = \"placeholder\"\n EMBEDDED = \"embedded\"\n\n\nclass DoclingImports:\n \"\"\"Container for docling imports with type information.\"\"\"\n\n def __init__(\n self,\n conversion_status: type[Enum],\n input_format: type[Enum],\n document_converter: type,\n image_ref_mode: type[Enum],\n strategy: str,\n ) -> None:\n self.conversion_status = conversion_status\n self.input_format = input_format\n self.document_converter = document_converter\n self.image_ref_mode = image_ref_mode\n self.strategy = strategy\n\n\nclass FileComponent(BaseFileComponent):\n \"\"\"Enhanced file component v2 that combines standard file loading with optional Docling processing and export.\n\n This component supports all features of the standard File component, plus an advanced mode\n that enables Docling document processing and export to various formats (Markdown, HTML, etc.).\n \"\"\"\n\n display_name = \"File\"\n description = \"Loads content from files with optional advanced document processing and export using Docling.\"\n documentation: str = \"https://docs.langflow.org/components-data#file\"\n icon = \"file-text\"\n name = \"File\"\n\n # Docling supported formats from original component\n VALID_EXTENSIONS = [\n \"adoc\",\n \"asciidoc\",\n \"asc\",\n \"bmp\",\n \"csv\",\n \"dotx\",\n \"dotm\",\n \"docm\",\n \"docx\",\n \"htm\",\n \"html\",\n \"jpeg\",\n \"json\",\n \"md\",\n \"pdf\",\n \"png\",\n \"potx\",\n \"ppsx\",\n \"pptm\",\n \"potm\",\n \"ppsm\",\n \"pptx\",\n \"tiff\",\n \"txt\",\n \"xls\",\n \"xlsx\",\n \"xhtml\",\n \"xml\",\n \"webp\",\n *TEXT_FILE_TYPES,\n ]\n\n # Fixed export settings\n EXPORT_FORMAT = \"Markdown\"\n IMAGE_MODE = \"placeholder\"\n\n _base_inputs = deepcopy(BaseFileComponent._base_inputs)\n\n for input_item in _base_inputs:\n if isinstance(input_item, FileInput) and input_item.name == \"path\":\n input_item.real_time_refresh = True\n break\n\n inputs = [\n *_base_inputs,\n BoolInput(\n name=\"advanced_mode\",\n display_name=\"Advanced Parser\",\n value=False,\n real_time_refresh=True,\n info=(\n \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n \"Available only for single file processing.\"\n ),\n show=False,\n ),\n DropdownInput(\n name=\"pipeline\",\n display_name=\"Pipeline\",\n info=\"Docling pipeline to use\",\n options=[\"standard\", \"vlm\"],\n value=\"standard\",\n advanced=True,\n ),\n DropdownInput(\n name=\"ocr_engine\",\n display_name=\"OCR Engine\",\n info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n options=[\"\", \"easyocr\"],\n value=\"\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"md_image_placeholder\",\n display_name=\"Image placeholder\",\n info=\"Specify the image placeholder for markdown exports.\",\n value=\"\",\n advanced=True,\n show=False,\n ),\n StrInput(\n name=\"md_page_break_placeholder\",\n display_name=\"Page break placeholder\",\n info=\"Add this placeholder between pages in the markdown output.\",\n value=\"\",\n advanced=True,\n show=False,\n ),\n MessageTextInput(\n name=\"doc_key\",\n display_name=\"Doc Key\",\n info=\"The key to use for the DoclingDocument column.\",\n value=\"doc\",\n advanced=True,\n show=False,\n ),\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"[Deprecated] Use Multithreading\",\n advanced=True,\n value=True,\n info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"When multiple files are being processed, the number of files to process concurrently.\",\n value=1,\n ),\n BoolInput(\n name=\"markdown\",\n display_name=\"Markdown Export\",\n info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n value=False,\n show=False,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\"),\n ]\n\n def _path_value(self, template) -> list[str]:\n # Get current path value\n return template.get(\"path\", {}).get(\"file_path\", [])\n\n def update_build_config(\n self,\n build_config: dict[str, Any],\n field_value: Any,\n field_name: str | None = None,\n ) -> dict[str, Any]:\n \"\"\"Update build configuration to show/hide fields based on file count and advanced_mode.\"\"\"\n if field_name == \"path\":\n # Get current path value\n path_value = self._path_value(build_config)\n file_path = path_value[0] if len(path_value) > 0 else \"\"\n\n # Show/hide Advanced Parser based on file count (only for single files)\n file_count = len(field_value) if field_value else 0\n if file_count == 1 and not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n build_config[\"advanced_mode\"][\"show\"] = True\n else:\n build_config[\"advanced_mode\"][\"show\"] = False\n build_config[\"advanced_mode\"][\"value\"] = False # Reset to False when hidden\n\n # Hide all advanced fields when Advanced Parser is not available\n advanced_fields = [\n \"pipeline\",\n \"ocr_engine\",\n \"doc_key\",\n \"md_image_placeholder\",\n \"md_page_break_placeholder\",\n ]\n for field in advanced_fields:\n if field in build_config:\n build_config[field][\"show\"] = False\n\n elif field_name == \"advanced_mode\":\n # Show/hide advanced fields based on advanced_mode (only if single file)\n advanced_fields = [\n \"pipeline\",\n \"ocr_engine\",\n \"doc_key\",\n \"md_image_placeholder\",\n \"md_page_break_placeholder\",\n ]\n\n for field in advanced_fields:\n if field in build_config:\n build_config[field][\"show\"] = field_value\n\n return build_config\n\n def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]: # noqa: ARG002\n \"\"\"Dynamically show outputs based on the number of files and their types.\"\"\"\n if field_name not in [\"path\", \"advanced_mode\"]:\n return frontend_node\n\n # Add outputs based on the number of files in the path\n template = frontend_node.get(\"template\", {})\n path_value = self._path_value(template)\n if len(path_value) == 0:\n return frontend_node\n\n # Clear existing outputs\n frontend_node[\"outputs\"] = []\n\n if len(path_value) == 1:\n # We need to check if the file is structured content\n file_path = path_value[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Content\", name=\"dataframe\", method=\"load_files_structured\"),\n )\n elif file_path.endswith(\".json\"):\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\"),\n )\n\n # Add outputs based on advanced mode\n advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n\n if advanced_mode:\n # Advanced mode: Structured Output, Markdown, and File Path\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Output\", name=\"advanced\", method=\"load_files_advanced\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Markdown\", name=\"markdown\", method=\"load_files_markdown\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\"),\n )\n else:\n # Normal mode: Raw Content and File Path\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\"),\n )\n else:\n # For multiple files, we show the files output (DataFrame format)\n # Advanced Parser is not available for multiple files\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\"),\n )\n\n return frontend_node\n\n def _try_import_docling(self) -> DoclingImports | None:\n \"\"\"Try different import strategies for docling components.\"\"\"\n # Try strategy 1: Latest docling structure\n try:\n from docling.datamodel.base_models import ConversionStatus, InputFormat # type: ignore[import-untyped]\n from docling.document_converter import DocumentConverter # type: ignore[import-untyped]\n from docling_core.types.doc import ImageRefMode # type: ignore[import-untyped]\n\n self.log(\"Using latest docling import structure\")\n return DoclingImports(\n conversion_status=ConversionStatus,\n input_format=InputFormat,\n document_converter=DocumentConverter,\n image_ref_mode=ImageRefMode,\n strategy=\"latest\",\n )\n except ImportError as e:\n self.log(f\"Latest docling structure failed: {e}\")\n\n # Try strategy 2: Alternative import paths\n try:\n from docling.document_converter import DocumentConverter # type: ignore[import-untyped]\n from docling_core.types.doc import ImageRefMode # type: ignore[import-untyped]\n\n # Try to get ConversionStatus from different locations\n conversion_status: type[Enum] = MockConversionStatus\n input_format: type[Enum] = MockInputFormat\n\n try:\n from docling_core.types import ConversionStatus, InputFormat # type: ignore[import-untyped]\n\n conversion_status = ConversionStatus\n input_format = InputFormat\n except ImportError:\n try:\n from docling.datamodel import ConversionStatus, InputFormat # type: ignore[import-untyped]\n\n conversion_status = ConversionStatus\n input_format = InputFormat\n except ImportError:\n # Use mock enums if we can't find them\n pass\n\n self.log(\"Using alternative docling import structure\")\n return DoclingImports(\n conversion_status=conversion_status,\n input_format=input_format,\n document_converter=DocumentConverter,\n image_ref_mode=ImageRefMode,\n strategy=\"alternative\",\n )\n except ImportError as e:\n self.log(f\"Alternative docling structure failed: {e}\")\n\n # Try strategy 3: Basic converter only\n try:\n from docling.document_converter import DocumentConverter # type: ignore[import-untyped]\n\n self.log(\"Using basic docling import structure with mocks\")\n return DoclingImports(\n conversion_status=MockConversionStatus,\n input_format=MockInputFormat,\n document_converter=DocumentConverter,\n image_ref_mode=MockImageRefMode,\n strategy=\"basic\",\n )\n except ImportError as e:\n self.log(f\"Basic docling structure failed: {e}\")\n\n # Strategy 4: Complete fallback - return None to indicate failure\n return None\n\n def _create_advanced_converter(self, docling_imports: DoclingImports) -> Any:\n \"\"\"Create advanced converter with pipeline options if available.\"\"\"\n try:\n from docling.datamodel.pipeline_options import PdfPipelineOptions # type: ignore[import-untyped]\n from docling.document_converter import PdfFormatOption # type: ignore[import-untyped]\n\n document_converter = docling_imports.document_converter\n input_format = docling_imports.input_format\n\n # Create basic pipeline options\n pipeline_options = PdfPipelineOptions()\n\n # Configure OCR if specified and available\n if self.ocr_engine:\n try:\n from docling.models.factories import get_ocr_factory # type: ignore[import-untyped]\n\n pipeline_options.do_ocr = True\n ocr_factory = get_ocr_factory(allow_external_plugins=False)\n ocr_options = ocr_factory.create_options(kind=self.ocr_engine)\n pipeline_options.ocr_options = ocr_options\n self.log(f\"Configured OCR with engine: {self.ocr_engine}\")\n except Exception as e: # noqa: BLE001\n self.log(f\"Could not configure OCR: {e}, proceeding without OCR\")\n pipeline_options.do_ocr = False\n\n # Create format options\n pdf_format_option = PdfFormatOption(pipeline_options=pipeline_options)\n format_options = {}\n if hasattr(input_format, \"PDF\"):\n format_options[input_format.PDF] = pdf_format_option\n if hasattr(input_format, \"IMAGE\"):\n format_options[input_format.IMAGE] = pdf_format_option\n\n return document_converter(format_options=format_options)\n\n except Exception as e: # noqa: BLE001\n self.log(f\"Could not create advanced converter: {e}, using basic converter\")\n return docling_imports.document_converter()\n\n def _is_docling_compatible(self, file_path: str) -> bool:\n \"\"\"Check if file is compatible with Docling processing.\"\"\"\n # All VALID_EXTENSIONS are Docling compatible (except for TEXT_FILE_TYPES which may overlap)\n docling_extensions = [\n \".adoc\",\n \".asciidoc\",\n \".asc\",\n \".bmp\",\n \".csv\",\n \".dotx\",\n \".dotm\",\n \".docm\",\n \".docx\",\n \".htm\",\n \".html\",\n \".jpeg\",\n \".json\",\n \".md\",\n \".pdf\",\n \".png\",\n \".potx\",\n \".ppsx\",\n \".pptm\",\n \".potm\",\n \".ppsm\",\n \".pptx\",\n \".tiff\",\n \".txt\",\n \".xls\",\n \".xlsx\",\n \".xhtml\",\n \".xml\",\n \".webp\",\n ]\n return any(file_path.lower().endswith(ext) for ext in docling_extensions)\n\n def process_files(\n self,\n file_list: list[BaseFileComponent.BaseFile],\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process files using standard parsing or Docling based on advanced_mode and file type.\"\"\"\n\n def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n \"\"\"Process a single file using standard text parsing.\"\"\"\n try:\n return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n except FileNotFoundError as e:\n msg = f\"File not found: {file_path}. Error: {e}\"\n self.log(msg)\n if not silent_errors:\n raise\n return None\n except Exception as e:\n msg = f\"Unexpected error processing {file_path}: {e}\"\n self.log(msg)\n if not silent_errors:\n raise\n return None\n\n def process_file_docling(file_path: str, *, silent_errors: bool = False) -> Data | None:\n \"\"\"Process a single file using Docling if compatible, otherwise standard processing.\"\"\"\n # Try Docling first if file is compatible and advanced mode is enabled\n try:\n return self._process_with_docling_and_export(file_path)\n except Exception as e: # noqa: BLE001\n self.log(f\"Docling processing failed for {file_path}: {e}, falling back to standard processing\")\n if not silent_errors:\n # Return error data instead of raising\n return Data(data={\"error\": f\"Docling processing failed: {e}\", \"file_path\": file_path})\n\n return None\n\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n file_path = str(file_list[0].path)\n if self.advanced_mode and self._is_docling_compatible(file_path):\n processed_data = process_file_docling(file_path)\n if not processed_data:\n msg = f\"Failed to process file with Docling: {file_path}\"\n raise ValueError(msg)\n\n # Serialize processed data to match Data structure\n serialized_data = processed_data.serialize_model()\n\n # Now, if doc is nested, we need to unravel it\n clean_data: list[Data | None] = [processed_data]\n\n # This is where we've manually processed the data\n try:\n if \"exported_content\" not in serialized_data:\n clean_data = [\n Data(\n data={\n \"file_path\": file_path,\n **(\n item[\"element\"]\n if \"element\" in item\n else {k: v for k, v in item.items() if k != \"file_path\"}\n ),\n }\n )\n for item in serialized_data[\"doc\"]\n ]\n except Exception as _: # noqa: BLE001\n raise ValueError(serialized_data) from None\n\n # Repeat file_list to match the number of processed data elements\n final_data: list[Data | None] = clean_data\n return self.rollup_data(file_list, final_data)\n\n concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n file_count = len(file_list)\n\n self.log(f\"Starting parallel processing of {file_count} files with concurrency: {concurrency}.\")\n file_paths = [str(file.path) for file in file_list]\n my_data = parallel_load_data(\n file_paths,\n silent_errors=self.silent_errors,\n load_function=process_file_standard,\n max_concurrency=concurrency,\n )\n\n return self.rollup_data(file_list, my_data)\n\n def load_files_advanced(self) -> DataFrame:\n \"\"\"Load files using advanced Docling processing and export to an advanced format.\"\"\"\n # TODO: Update\n self.markdown = False\n return self.load_files()\n\n def load_files_markdown(self) -> Message:\n \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n self.markdown = True\n result = self.load_files()\n return Message(text=str(result.text[0]))\n\n def _process_with_docling_and_export(self, file_path: str) -> Data:\n \"\"\"Process a single file with Docling and export to the specified format.\"\"\"\n # Import docling components only when needed\n docling_imports = self._try_import_docling()\n\n if docling_imports is None:\n msg = \"Docling not available for advanced processing\"\n raise ImportError(msg)\n\n conversion_status = docling_imports.conversion_status\n document_converter = docling_imports.document_converter\n image_ref_mode = docling_imports.image_ref_mode\n\n try:\n # Create converter based on strategy and pipeline setting\n if docling_imports.strategy == \"latest\" and self.pipeline == \"standard\":\n converter = self._create_advanced_converter(docling_imports)\n else:\n # Use basic converter for compatibility\n converter = document_converter()\n self.log(\"Using basic DocumentConverter for Docling processing\")\n\n # Process single file\n result = converter.convert(file_path)\n\n # Check if conversion was successful\n success = False\n if hasattr(result, \"status\"):\n if hasattr(conversion_status, \"SUCCESS\"):\n success = result.status == conversion_status.SUCCESS\n else:\n success = str(result.status).lower() == \"success\"\n elif hasattr(result, \"document\"):\n # If no status but has document, assume success\n success = result.document is not None\n\n if not success:\n return Data(data={\"error\": \"Docling conversion failed\", \"file_path\": file_path})\n\n if self.markdown:\n self.log(\"Exporting document to Markdown format\")\n # Export the document to the specified format\n exported_content = self._export_document(result.document, image_ref_mode)\n\n return Data(\n text=exported_content,\n data={\n \"exported_content\": exported_content,\n \"export_format\": self.EXPORT_FORMAT,\n \"file_path\": file_path,\n },\n )\n\n return Data(\n data={\n \"doc\": self.docling_to_dataframe_simple(result.document.export_to_dict()),\n \"export_format\": self.EXPORT_FORMAT,\n \"file_path\": file_path,\n }\n )\n\n except Exception as e: # noqa: BLE001\n return Data(data={\"error\": f\"Docling processing error: {e!s}\", \"file_path\": file_path})\n\n def docling_to_dataframe_simple(self, doc):\n \"\"\"Extract all text elements into a simple DataFrame.\"\"\"\n return [\n {\n \"page_no\": text[\"prov\"][0][\"page_no\"] if text[\"prov\"] else None,\n \"label\": text[\"label\"],\n \"text\": text[\"text\"],\n \"level\": text.get(\"level\", None), # for headers\n }\n for text in doc[\"texts\"]\n ]\n\n def _export_document(self, document: Any, image_ref_mode: type[Enum]) -> str:\n \"\"\"Export document to Markdown format with placeholder images.\"\"\"\n try:\n image_mode = (\n image_ref_mode(self.IMAGE_MODE) if hasattr(image_ref_mode, self.IMAGE_MODE) else self.IMAGE_MODE\n )\n\n # Always export to Markdown since it's fixed\n return document.export_to_markdown(\n image_mode=image_mode,\n image_placeholder=self.md_image_placeholder,\n page_break_placeholder=self.md_page_break_placeholder,\n )\n\n except Exception as e: # noqa: BLE001\n self.log(f\"Markdown export failed: {e}, using basic text export\")\n # Fallback to basic text export\n try:\n return document.export_to_text()\n except Exception: # noqa: BLE001\n return str(document)\n" }, - "api_endpoint": { - "_input_type": "StrInput", - "advanced": false, - "display_name": "Astra DB API Endpoint", + "concurrency_multithreading": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Processing Concurrency", "dynamic": false, - "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.", + "info": "When multiple files are being processed, the number of files to process concurrently.", "list": false, "list_add_label": "Add More", - "load_from_db": false, - "name": "api_endpoint", + "name": "concurrency_multithreading", "placeholder": "", "required": false, - "show": false, + "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "" + "type": "int", + "value": 1 }, - "astradb_vectorstore_kwargs": { - "_input_type": "NestedDictInput", + "delete_server_file_after_processing": { + "_input_type": "BoolInput", "advanced": true, - "display_name": "AstraDBVectorStore Parameters", + "display_name": "Delete Server File After Processing", "dynamic": false, - "info": "Optional dictionary of additional parameters for the AstraDBVectorStore.", + "info": "If true, the Server File Path will be deleted after processing.", "list": false, "list_add_label": "Add More", - "name": "astradb_vectorstore_kwargs", + "name": "delete_server_file_after_processing", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, - "type": "NestedDict", - "value": {} + "type": "bool", + "value": true }, - "autodetect_collection": { + "file_path": { + "_input_type": "HandleInput", + "advanced": true, + "display_name": "Server File Path", + "dynamic": false, + "info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.", + "input_types": [ + "Data", + "Message" + ], + "list": true, + "list_add_label": "Add More", + "name": "file_path", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "ignore_unspecified_files": { "_input_type": "BoolInput", "advanced": true, - "display_name": "Autodetect Collection", + "display_name": "Ignore Unspecified Files", "dynamic": false, - "info": "Boolean flag to determine whether to autodetect the collection.", + "info": "If true, Data with no 'file_path' property will be ignored.", "list": false, "list_add_label": "Add More", - "name": "autodetect_collection", + "name": "ignore_unspecified_files", "placeholder": "", "required": false, "show": true, @@ -2922,156 +2814,81 @@ "tool_mode": false, "trace_as_metadata": true, "type": "bool", - "value": true + "value": false }, - "code": { + "ignore_unsupported_extensions": { + "_input_type": "BoolInput", "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", + "display_name": "Ignore Unsupported Extensions", + "dynamic": false, + "info": "If true, files with unsupported extensions will not be processed.", "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, + "list_add_label": "Add More", + "name": "ignore_unsupported_extensions", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, - "type": "code", - "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n 'your database in Astra DB'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f''\n \"your database in Astra DB.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true }, - "collection_name": { - "_input_type": "DropdownInput", + "path": { + "_input_type": "FileInput", "advanced": false, - "combobox": true, - "dialog_inputs": { - "fields": { - "data": { - "node": { - "description": "Please allow several seconds for creation to complete.", - "display_name": "Create new collection", - "field_order": [ - "01_new_collection_name", - "02_embedding_generation_provider", - "03_embedding_generation_model", - "04_dimension" - ], - "name": "create_collection", - "template": { - "01_new_collection_name": { - "_input_type": "StrInput", - "advanced": false, - "display_name": "Name", - "dynamic": false, - "info": "Name of the new collection to create in Astra DB.", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "new_collection_name", - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "02_embedding_generation_provider": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Embedding generation method", - "dynamic": false, - "helper_text": "To create collections with more embedding provider options, go to your database in Astra DB", - "info": "Provider to use for generating embeddings.", - "name": "embedding_generation_provider", - "options": [], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": true, - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "03_embedding_generation_model": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Embedding model", - "dynamic": false, - "info": "Model to use for generating embeddings.", - "name": "embedding_generation_model", - "options": [], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "04_dimension": { - "_input_type": "IntInput", - "advanced": false, - "display_name": "Dimensions", - "dynamic": false, - "info": "Dimensions of the embeddings to generate.", - "list": false, - "list_add_label": "Add More", - "name": "dimension", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int" - } - } - } - } - }, - "functionality": "create" - }, - "display_name": "Collection", + "display_name": "Files", "dynamic": false, - "info": "The name of the collection within Astra DB where the vectors will be stored.", - "name": "collection_name", - "options": [], - "options_metadata": [], + "fileTypes": [ + "txt", + "md", + "mdx", + "csv", + "json", + "yaml", + "yml", + "xml", + "html", + "htm", + "pdf", + "docx", + "py", + "sh", + "sql", + "js", + "ts", + "tsx", + "zip", + "tar", + "tgz", + "bz2", + "gz" + ], + "file_path": [], + "info": "Supported file extensions: txt, md, mdx, csv, json, yaml, yml, xml, html, htm, pdf, docx, py, sh, sql, js, ts, tsx; optionally bundled in file extensions: zip, tar, tgz, bz2, gz", + "list": true, + "list_add_label": "Add More", + "name": "path", "placeholder": "", "real_time_refresh": true, - "refresh_button": true, - "required": true, - "show": false, + "required": false, + "show": true, + "temp_file": false, "title_case": false, - "tool_mode": false, "trace_as_metadata": true, - "type": "str", + "type": "file", "value": "" }, - "content_field": { + "separator": { "_input_type": "StrInput", "advanced": true, - "display_name": "Content Field", + "display_name": "Separator", "dynamic": false, - "info": "Field to use as the text content field for the vector store.", + "info": "Specify the separator to use between multiple outputs in Message format.", "list": false, "list_add_label": "Add More", "load_from_db": false, - "name": "content_field", + "name": "separator", "placeholder": "", "required": false, "show": true, @@ -3079,196 +2896,35 @@ "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "" + "value": "\n\n" }, - "database_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": true, - "dialog_inputs": { - "fields": { - "data": { - "node": { - "description": "Please allow several minutes for creation to complete.", - "display_name": "Create new database", - "field_order": [ - "01_new_database_name", - "02_cloud_provider", - "03_region" - ], - "name": "create_database", - "template": { - "01_new_database_name": { - "_input_type": "StrInput", - "advanced": false, - "display_name": "Name", - "dynamic": false, - "info": "Name of the new database to create in Astra DB.", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "new_database_name", - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "02_cloud_provider": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Cloud provider", - "dynamic": false, - "info": "Cloud provider for the new database.", - "name": "cloud_provider", - "options": [ - "Amazon Web Services", - "Google Cloud Platform", - "Microsoft Azure" - ], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": true, - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "03_region": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Region", - "dynamic": false, - "info": "Region for the new database.", - "name": "region", - "options": [], - "options_metadata": [], - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - } - } - } - } - }, - "functionality": "create" - }, - "display_name": "Database", - "dynamic": false, - "info": "The Database name for the Astra DB instance.", - "name": "database_name", - "options": [], - "options_metadata": [ - { - "api_endpoint": "https://5b8bb22c-4a38-4f0a-865c-a18ed7590bd1-us-east-2.apps.astra.datastax.com", - "collections": 5, - "keyspaces": [ - "default_keyspace", - "samples_dataflow" - ], - "org_id": "260f986d-e65c-4f05-94a3-7cebfcb867a3", - "status": null - } - ], - "placeholder": "", - "real_time_refresh": true, - "refresh_button": true, - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "deletion_field": { - "_input_type": "StrInput", + "silent_errors": { + "_input_type": "BoolInput", "advanced": true, - "display_name": "Deletion Based On Field", - "dynamic": false, - "info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "deletion_field", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "embedding_model": { - "_input_type": "HandleInput", - "advanced": false, - "display_name": "Embedding Model", + "display_name": "Silent Errors", "dynamic": false, - "info": "Specify the Embedding Model. Not required for Astra Vectorize collections.", - "input_types": [ - "Embeddings" - ], + "info": "If true, errors will not raise an exception.", "list": false, "list_add_label": "Add More", - "name": "embedding_model", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "other", - "value": "" - }, - "environment": { - "_input_type": "DropdownInput", - "advanced": true, - "combobox": true, - "dialog_inputs": {}, - "display_name": "Environment", - "dynamic": false, - "info": "The environment for the Astra DB API Endpoint.", - "name": "environment", - "options": [ - "prod", - "test", - "dev" - ], - "options_metadata": [], + "name": "silent_errors", "placeholder": "", - "real_time_refresh": true, "required": false, "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "prod" + "type": "bool", + "value": false }, - "ignore_invalid_documents": { + "use_multithreading": { "_input_type": "BoolInput", "advanced": true, - "display_name": "Ignore Invalid Documents", + "display_name": "[Deprecated] Use Multithreading", "dynamic": false, - "info": "Boolean flag to determine whether to ignore invalid documents at runtime.", + "info": "Set 'Processing Concurrency' greater than 1 to enable multithreading.", "list": false, "list_add_label": "Add More", - "name": "ignore_invalid_documents", + "name": "use_multithreading", "placeholder": "", "required": false, "show": true, @@ -3276,260 +2932,324 @@ "tool_mode": false, "trace_as_metadata": true, "type": "bool", - "value": false + "value": true + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "File" + }, + "dragging": false, + "id": "File-PvlCh", + "measured": { + "height": 230, + "width": 320 + }, + "position": { + "x": 1330.7650978046952, + "y": 1431.5905495627503 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "LanguageModelComponent-9vLb9", + "node": { + "base_classes": [ + "LanguageModel", + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Runs a language model given a specified provider. ", + "display_name": "Language Model", + "documentation": "", + "edited": false, + "field_order": [ + "provider", + "model_name", + "api_key", + "input_value", + "system_message", + "stream", + "temperature" + ], + "frozen": false, + "icon": "brain-circuit", + "last_updated": "2025-08-26T16:30:56.494Z", + "legacy": false, + "metadata": { + "keywords": [ + "model", + "llm", + "language model", + "large language model" + ] + }, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Model Response", + "group_outputs": false, + "method": "text_response", + "name": "text_output", + "options": null, + "required_inputs": null, + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" }, - "ingest_data": { - "_input_type": "HandleInput", + { + "allows_loop": false, + "cache": true, + "display_name": "Language Model", + "group_outputs": false, + "method": "build_model", + "name": "model_output", + "options": null, + "required_inputs": null, + "selected": "LanguageModel", + "tool_mode": true, + "types": [ + "LanguageModel" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "priority": 0, + "template": { + "_type": "Component", + "api_key": { + "_input_type": "SecretStrInput", "advanced": false, - "display_name": "Ingest Data", + "display_name": "OpenAI API Key", "dynamic": false, - "info": "", - "input_types": [ - "Data", - "DataFrame" - ], - "list": true, - "list_add_label": "Add More", - "name": "ingest_data", + "info": "Model Provider API key", + "input_types": [], + "load_from_db": true, + "name": "api_key", + "password": true, "placeholder": "", + "real_time_refresh": true, "required": false, "show": true, "title_case": false, - "trace_as_metadata": true, - "type": "other", - "value": "" + "type": "str", + "value": "OPENAI_API_KEY" }, - "keyspace": { - "_input_type": "DropdownInput", + "code": { "advanced": true, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Keyspace", - "dynamic": false, - "info": "Optional keyspace within Astra DB to use for the collection.", - "name": "keyspace", - "options": [], - "options_metadata": [], + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, "placeholder": "", - "real_time_refresh": true, - "required": false, + "required": true, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" + "type": "code", + "value": "from typing import Any\n\nfrom langchain_anthropic import ChatAnthropic\nfrom langchain_google_genai import ChatGoogleGenerativeAI\nfrom langchain_openai import ChatOpenAI\n\nfrom langflow.base.models.anthropic_constants import ANTHROPIC_MODELS\nfrom langflow.base.models.google_generative_ai_constants import GOOGLE_GENERATIVE_AI_MODELS\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import OPENAI_CHAT_MODEL_NAMES, OPENAI_REASONING_MODEL_NAMES\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageInput, MultilineInput, SecretStrInput, SliderInput\nfrom langflow.schema.dotdict import dotdict\n\n\nclass LanguageModelComponent(LCModelComponent):\n display_name = \"Language Model\"\n description = \"Runs a language model given a specified provider.\"\n documentation: str = \"https://docs.langflow.org/components-models\"\n icon = \"brain-circuit\"\n category = \"models\"\n priority = 0 # Set priority to 0 to make it appear first\n\n inputs = [\n DropdownInput(\n name=\"provider\",\n display_name=\"Model Provider\",\n options=[\"OpenAI\", \"Anthropic\", \"Google\"],\n value=\"OpenAI\",\n info=\"Select the model provider\",\n real_time_refresh=True,\n options_metadata=[{\"icon\": \"OpenAI\"}, {\"icon\": \"Anthropic\"}, {\"icon\": \"GoogleGenerativeAI\"}],\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES,\n value=OPENAI_CHAT_MODEL_NAMES[0],\n info=\"Select the model to use\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"OpenAI API Key\",\n info=\"Model Provider API key\",\n required=False,\n show=True,\n real_time_refresh=True,\n ),\n MessageInput(\n name=\"input_value\",\n display_name=\"Input\",\n info=\"The input text to send to the model\",\n ),\n MultilineInput(\n name=\"system_message\",\n display_name=\"System Message\",\n info=\"A system message that helps set the behavior of the assistant\",\n advanced=False,\n ),\n BoolInput(\n name=\"stream\",\n display_name=\"Stream\",\n info=\"Whether to stream the response\",\n value=False,\n advanced=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n info=\"Controls randomness in responses\",\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n ]\n\n def build_model(self) -> LanguageModel:\n provider = self.provider\n model_name = self.model_name\n temperature = self.temperature\n stream = self.stream\n\n if provider == \"OpenAI\":\n if not self.api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n\n if model_name in OPENAI_REASONING_MODEL_NAMES:\n # reasoning models do not support temperature (yet)\n temperature = None\n\n return ChatOpenAI(\n model_name=model_name,\n temperature=temperature,\n streaming=stream,\n openai_api_key=self.api_key,\n )\n if provider == \"Anthropic\":\n if not self.api_key:\n msg = \"Anthropic API key is required when using Anthropic provider\"\n raise ValueError(msg)\n return ChatAnthropic(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n anthropic_api_key=self.api_key,\n )\n if provider == \"Google\":\n if not self.api_key:\n msg = \"Google API key is required when using Google provider\"\n raise ValueError(msg)\n return ChatGoogleGenerativeAI(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n google_api_key=self.api_key,\n )\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n if field_name == \"provider\":\n if field_value == \"OpenAI\":\n build_config[\"model_name\"][\"options\"] = OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES\n build_config[\"model_name\"][\"value\"] = OPENAI_CHAT_MODEL_NAMES[0]\n build_config[\"api_key\"][\"display_name\"] = \"OpenAI API Key\"\n elif field_value == \"Anthropic\":\n build_config[\"model_name\"][\"options\"] = ANTHROPIC_MODELS\n build_config[\"model_name\"][\"value\"] = ANTHROPIC_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Anthropic API Key\"\n elif field_value == \"Google\":\n build_config[\"model_name\"][\"options\"] = GOOGLE_GENERATIVE_AI_MODELS\n build_config[\"model_name\"][\"value\"] = GOOGLE_GENERATIVE_AI_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Google API Key\"\n elif field_name == \"model_name\" and field_value.startswith(\"o1\") and self.provider == \"OpenAI\":\n # Hide system_message for o1 models - currently unsupported\n if \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = False\n elif field_name == \"model_name\" and not field_value.startswith(\"o1\") and \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = True\n return build_config\n" }, - "lexical_terms": { - "_input_type": "QueryInput", - "advanced": true, - "display_name": "Lexical Terms", + "input_value": { + "_input_type": "MessageInput", + "advanced": false, + "display_name": "Input", "dynamic": false, - "info": "Add additional terms/keywords to augment search precision.", + "info": "The input text to send to the model", "input_types": [ "Message" ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "name": "lexical_terms", - "placeholder": "Enter terms to search...", - "required": false, - "separator": " ", - "show": false, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "query", - "value": "" - }, - "number_of_results": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Number of Search Results", - "dynamic": false, - "info": "Number of search results to return.", - "list": false, - "list_add_label": "Add More", - "name": "number_of_results", + "name": "input_value", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "int", - "value": 4 + "type": "str", + "value": "" }, - "reranker": { + "model_name": { "_input_type": "DropdownInput", "advanced": false, "combobox": false, "dialog_inputs": {}, - "display_name": "Reranker", + "display_name": "Model Name", "dynamic": false, - "info": "Post-retrieval model that re-scores results for optimal relevance ranking.", - "name": "reranker", - "options": [], + "info": "Select the model to use", + "name": "model_name", + "options": [ + "gpt-4o-mini", + "gpt-4o", + "gpt-4.1", + "gpt-4.1-mini", + "gpt-4.1-nano", + "gpt-4.5-preview", + "gpt-4-turbo", + "gpt-4-turbo-preview", + "gpt-4", + "gpt-3.5-turbo" + ], "options_metadata": [], "placeholder": "", "required": false, - "show": false, + "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "" + "value": "gpt-4o-mini" }, - "search_method": { + "provider": { "_input_type": "DropdownInput", - "advanced": true, + "advanced": false, "combobox": false, "dialog_inputs": {}, - "display_name": "Search Method", + "display_name": "Model Provider", "dynamic": false, - "info": "Determine how your content is matched: Vector finds semantic similarity, and Hybrid Search (suggested) combines both approaches with a reranker.", - "name": "search_method", + "info": "Select the model provider", + "name": "provider", "options": [ - "Hybrid Search", - "Vector Search" + "OpenAI", + "Anthropic", + "Google" + ], + "options_metadata": [ + { + "icon": "OpenAI" + }, + { + "icon": "Anthropic" + }, + { + "icon": "GoogleGenerativeAI" + } ], - "options_metadata": [], "placeholder": "", "real_time_refresh": true, "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "Vector Search" - }, - "search_query": { - "_input_type": "QueryInput", - "advanced": false, - "display_name": "Search Query", - "dynamic": false, - "info": "Enter a query to run a similarity search.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "search_query", - "placeholder": "Enter a query...", - "required": false, - "show": true, - "title_case": false, - "tool_mode": true, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "query", - "value": "" - }, - "search_score_threshold": { - "_input_type": "FloatInput", - "advanced": true, - "display_name": "Search Score Threshold", - "dynamic": false, - "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')", - "list": false, - "list_add_label": "Add More", - "name": "search_score_threshold", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "float", - "value": 0 + "value": "OpenAI" }, - "search_type": { - "_input_type": "DropdownInput", + "stream": { + "_input_type": "BoolInput", "advanced": true, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Search Type", - "dynamic": false, - "info": "Search type to use", - "name": "search_type", - "options": [ - "Similarity", - "Similarity with score threshold", - "MMR (Max Marginal Relevance)" - ], - "options_metadata": [], + "display_name": "Stream", + "dynamic": false, + "info": "Whether to stream the response", + "list": false, + "list_add_label": "Add More", + "name": "stream", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "Similarity" + "type": "bool", + "value": false }, - "should_cache_vector_store": { - "_input_type": "BoolInput", + "system_message": { + "_input_type": "MultilineInput", "advanced": true, - "display_name": "Cache Vector Store", + "copy_field": false, + "display_name": "System Message", "dynamic": false, - "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.", + "info": "A system message that helps set the behavior of the assistant", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", - "name": "should_cache_vector_store", + "load_from_db": false, + "multiline": true, + "name": "system_message", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "str", + "value": "" }, - "token": { - "_input_type": "SecretStrInput", - "advanced": false, - "display_name": "Astra DB Application Token", + "temperature": { + "_input_type": "SliderInput", + "advanced": true, + "display_name": "Temperature", "dynamic": false, - "info": "Authentication token for accessing Astra DB.", - "input_types": [], - "load_from_db": true, - "name": "token", - "password": true, + "info": "Controls randomness in responses", + "max_label": "", + "max_label_icon": "", + "min_label": "", + "min_label_icon": "", + "name": "temperature", "placeholder": "", - "real_time_refresh": true, - "required": true, + "range_spec": { + "max": 1, + "min": 0, + "step": 0.01, + "step_type": "float" + }, + "required": false, "show": true, + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, "title_case": false, - "type": "str", - "value": "ASTRA_DB_APPLICATION_TOKEN" + "tool_mode": false, + "type": "slider", + "value": 0.1 } }, "tool_mode": false }, - "selected_output": "dataframe", + "selected_output": "text_output", "showNode": true, - "type": "AstraDB" + "type": "LanguageModelComponent" }, "dragging": false, - "id": "AstraDB-JsRrT", + "id": "LanguageModelComponent-9vLb9", "measured": { - "height": 502, + "height": 451, "width": 320 }, "position": { - "x": 1206.2272993725155, - "y": 491.41485400844977 + "x": 2354.7612483129965, + "y": 633.8261067248878 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "AstraDB-W6NB4", + "id": "AstraDB-s5fQW", "node": { "base_classes": [ "Data", @@ -3569,6 +3289,7 @@ ], "frozen": false, "icon": "AstraDB", + "last_updated": "2025-08-26T16:31:17.471Z", "legacy": false, "metadata": { "code_hash": "23fbe9daca09", @@ -3593,7 +3314,7 @@ ], "total_dependencies": 4 }, - "module": "langflow.components.datastax.astradb.AstraDBVectorStoreComponent" + "module": "langflow.components.datastax.astradb_vectorstore.AstraDBVectorStoreComponent" }, "minimized": false, "output_types": [], @@ -3605,6 +3326,8 @@ "group_outputs": false, "method": "search_documents", "name": "search_results", + "options": null, + "required_inputs": null, "selected": "Data", "tool_mode": true, "types": [ @@ -3619,6 +3342,8 @@ "group_outputs": false, "method": "as_dataframe", "name": "dataframe", + "options": null, + "required_inputs": null, "selected": "DataFrame", "tool_mode": true, "types": [ @@ -3634,6 +3359,8 @@ "hidden": true, "method": "as_vector_store", "name": "vectorstoreconnection", + "options": null, + "required_inputs": null, "selected": "VectorStore", "tool_mode": true, "types": [ @@ -3665,19 +3392,21 @@ "value": {} }, "api_endpoint": { - "_input_type": "StrInput", - "advanced": false, + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "dialog_inputs": {}, "display_name": "Astra DB API Endpoint", "dynamic": false, "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, "name": "api_endpoint", + "options": [], + "options_metadata": [], "placeholder": "", "required": false, - "show": false, + "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", @@ -3792,6 +3521,7 @@ "required": true, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", @@ -3813,6 +3543,7 @@ "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", @@ -3853,6 +3584,7 @@ "required": true, "show": false, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", @@ -3933,6 +3665,7 @@ "required": true, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", @@ -3953,6 +3686,7 @@ "required": true, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", @@ -3969,24 +3703,14 @@ "info": "The Database name for the Astra DB instance.", "name": "database_name", "options": [], - "options_metadata": [ - { - "api_endpoint": "https://5b8bb22c-4a38-4f0a-865c-a18ed7590bd1-us-east-2.apps.astra.datastax.com", - "collections": 5, - "keyspaces": [ - "default_keyspace", - "samples_dataflow" - ], - "org_id": "260f986d-e65c-4f05-94a3-7cebfcb867a3", - "status": null - } - ], + "options_metadata": [], "placeholder": "", "real_time_refresh": true, "refresh_button": true, "required": true, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", @@ -4025,7 +3749,7 @@ "name": "embedding_model", "placeholder": "", "required": false, - "show": true, + "show": false, "title_case": false, "trace_as_metadata": true, "type": "other", @@ -4051,6 +3775,7 @@ "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", @@ -4111,6 +3836,7 @@ "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", @@ -4118,7 +3844,7 @@ }, "lexical_terms": { "_input_type": "QueryInput", - "advanced": true, + "advanced": false, "display_name": "Lexical Terms", "dynamic": false, "info": "Add additional terms/keywords to augment search precision.", @@ -4173,6 +3899,7 @@ "required": false, "show": false, "title_case": false, + "toggle": true, "tool_mode": false, "trace_as_metadata": true, "type": "str", @@ -4191,12 +3918,20 @@ "Hybrid Search", "Vector Search" ], - "options_metadata": [], + "options_metadata": [ + { + "icon": "SearchHybrid" + }, + { + "icon": "SearchVector" + } + ], "placeholder": "", "real_time_refresh": true, "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", @@ -4262,6 +3997,7 @@ "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", @@ -4311,63 +4047,136 @@ "type": "AstraDB" }, "dragging": false, - "id": "AstraDB-W6NB4", + "id": "AstraDB-s5fQW", "measured": { - "height": 502, + "height": 457, "width": 320 }, "position": { - "x": 2060.799531746744, - "y": 1507.872099528214 + "x": 2095.3200255891484, + "y": 1501.719729125222 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "File-vusZ2", + "id": "AstraDB-WAq6g", "node": { "base_classes": [ - "Message" + "Data", + "DataFrame", + "VectorStore" ], "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Loads content from one or more files as a DataFrame.", - "display_name": "File", - "documentation": "", + "description": "Ingest and search documents in Astra DB", + "display_name": "Astra DB", + "documentation": "https://docs.datastax.com/en/langflow/astra-components.html", "edited": false, "field_order": [ - "path", - "file_path", - "separator", - "silent_errors", - "delete_server_file_after_processing", - "ignore_unsupported_extensions", - "ignore_unspecified_files", - "use_multithreading", - "concurrency_multithreading" + "token", + "environment", + "database_name", + "api_endpoint", + "keyspace", + "collection_name", + "embedding_model", + "ingest_data", + "search_query", + "should_cache_vector_store", + "search_method", + "reranker", + "lexical_terms", + "number_of_results", + "search_type", + "search_score_threshold", + "advanced_search_filter", + "autodetect_collection", + "content_field", + "deletion_field", + "ignore_invalid_documents", + "astradb_vectorstore_kwargs" ], "frozen": false, - "icon": "file-text", + "icon": "AstraDB", + "last_updated": "2025-08-26T16:31:59.909Z", "legacy": false, - "metadata": {}, + "metadata": { + "code_hash": "23fbe9daca09", + "dependencies": { + "dependencies": [ + { + "name": "astrapy", + "version": "2.0.1" + }, + { + "name": "langchain_astradb", + "version": "0.6.0" + }, + { + "name": "langchain_core", + "version": "0.3.72" + }, + { + "name": "langflow", + "version": null + } + ], + "total_dependencies": 4 + }, + "module": "langflow.components.datastax.astradb_vectorstore.AstraDBVectorStoreComponent" + }, "minimized": false, "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "Raw Content", + "display_name": "Search Results", "group_outputs": false, - "method": "load_files_message", - "name": "message", + "method": "search_documents", + "name": "search_results", "options": null, "required_inputs": null, - "selected": "Message", + "selected": "Data", "tool_mode": true, "types": [ - "Message" + "Data" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "DataFrame", + "group_outputs": false, + "method": "as_dataframe", + "name": "dataframe", + "options": null, + "required_inputs": null, + "selected": "DataFrame", + "tool_mode": true, + "types": [ + "DataFrame" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "Vector Store Connection", + "group_outputs": false, + "hidden": true, + "method": "as_vector_store", + "name": "vectorstoreconnection", + "options": null, + "required_inputs": null, + "selected": "VectorStore", + "tool_mode": true, + "types": [ + "VectorStore" ], "value": "__UNDEFINED__" } @@ -4375,51 +4184,74 @@ "pinned": false, "template": { "_type": "Component", - "code": { + "advanced_search_filter": { + "_input_type": "NestedDictInput", "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", + "display_name": "Search Metadata Filter", + "dynamic": false, + "info": "Optional dictionary of filters to apply to the search query.", "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, + "list_add_label": "Add More", + "name": "advanced_search_filter", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "NestedDict", + "value": {} + }, + "api_endpoint": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Astra DB API Endpoint", + "dynamic": false, + "info": "The API Endpoint for the Astra DB instance. Supercedes database selection.", + "name": "api_endpoint", + "options": [], + "options_metadata": [], "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, - "type": "code", - "value": "\"\"\"Enhanced file component v2 with mypy and ruff compliance.\"\"\"\n\nfrom __future__ import annotations\n\nfrom copy import deepcopy\nfrom enum import Enum\nfrom typing import TYPE_CHECKING, Any\n\nfrom langflow.base.data.base_file import BaseFileComponent\nfrom langflow.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n FileInput,\n IntInput,\n MessageTextInput,\n Output,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.message import Message\n\nif TYPE_CHECKING:\n from langflow.schema import DataFrame\n\n\nclass MockConversionStatus(Enum):\n \"\"\"Mock ConversionStatus for fallback compatibility.\"\"\"\n\n SUCCESS = \"success\"\n FAILURE = \"failure\"\n\n\nclass MockInputFormat(Enum):\n \"\"\"Mock InputFormat for fallback compatibility.\"\"\"\n\n PDF = \"pdf\"\n IMAGE = \"image\"\n\n\nclass MockImageRefMode(Enum):\n \"\"\"Mock ImageRefMode for fallback compatibility.\"\"\"\n\n PLACEHOLDER = \"placeholder\"\n EMBEDDED = \"embedded\"\n\n\nclass DoclingImports:\n \"\"\"Container for docling imports with type information.\"\"\"\n\n def __init__(\n self,\n conversion_status: type[Enum],\n input_format: type[Enum],\n document_converter: type,\n image_ref_mode: type[Enum],\n strategy: str,\n ) -> None:\n self.conversion_status = conversion_status\n self.input_format = input_format\n self.document_converter = document_converter\n self.image_ref_mode = image_ref_mode\n self.strategy = strategy\n\n\nclass FileComponent(BaseFileComponent):\n \"\"\"Enhanced file component v2 that combines standard file loading with optional Docling processing and export.\n\n This component supports all features of the standard File component, plus an advanced mode\n that enables Docling document processing and export to various formats (Markdown, HTML, etc.).\n \"\"\"\n\n display_name = \"File\"\n description = \"Loads content from files with optional advanced document processing and export using Docling.\"\n documentation: str = \"https://docs.langflow.org/components-data#file\"\n icon = \"file-text\"\n name = \"File\"\n\n # Docling supported formats from original component\n VALID_EXTENSIONS = [\n \"adoc\",\n \"asciidoc\",\n \"asc\",\n \"bmp\",\n \"csv\",\n \"dotx\",\n \"dotm\",\n \"docm\",\n \"docx\",\n \"htm\",\n \"html\",\n \"jpeg\",\n \"json\",\n \"md\",\n \"pdf\",\n \"png\",\n \"potx\",\n \"ppsx\",\n \"pptm\",\n \"potm\",\n \"ppsm\",\n \"pptx\",\n \"tiff\",\n \"txt\",\n \"xls\",\n \"xlsx\",\n \"xhtml\",\n \"xml\",\n \"webp\",\n *TEXT_FILE_TYPES,\n ]\n\n # Fixed export settings\n EXPORT_FORMAT = \"Markdown\"\n IMAGE_MODE = \"placeholder\"\n\n _base_inputs = deepcopy(BaseFileComponent._base_inputs)\n\n for input_item in _base_inputs:\n if isinstance(input_item, FileInput) and input_item.name == \"path\":\n input_item.real_time_refresh = True\n break\n\n inputs = [\n *_base_inputs,\n BoolInput(\n name=\"advanced_mode\",\n display_name=\"Advanced Parser\",\n value=False,\n real_time_refresh=True,\n info=(\n \"Enable advanced document processing and export with Docling for PDFs, images, and office documents. \"\n \"Available only for single file processing.\"\n ),\n show=False,\n ),\n DropdownInput(\n name=\"pipeline\",\n display_name=\"Pipeline\",\n info=\"Docling pipeline to use\",\n options=[\"standard\", \"vlm\"],\n value=\"standard\",\n advanced=True,\n ),\n DropdownInput(\n name=\"ocr_engine\",\n display_name=\"OCR Engine\",\n info=\"OCR engine to use. Only available when pipeline is set to 'standard'.\",\n options=[\"\", \"easyocr\"],\n value=\"\",\n show=False,\n advanced=True,\n ),\n StrInput(\n name=\"md_image_placeholder\",\n display_name=\"Image placeholder\",\n info=\"Specify the image placeholder for markdown exports.\",\n value=\"\",\n advanced=True,\n show=False,\n ),\n StrInput(\n name=\"md_page_break_placeholder\",\n display_name=\"Page break placeholder\",\n info=\"Add this placeholder between pages in the markdown output.\",\n value=\"\",\n advanced=True,\n show=False,\n ),\n MessageTextInput(\n name=\"doc_key\",\n display_name=\"Doc Key\",\n info=\"The key to use for the DoclingDocument column.\",\n value=\"doc\",\n advanced=True,\n show=False,\n ),\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"[Deprecated] Use Multithreading\",\n advanced=True,\n value=True,\n info=\"Set 'Processing Concurrency' greater than 1 to enable multithreading.\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"When multiple files are being processed, the number of files to process concurrently.\",\n value=1,\n ),\n BoolInput(\n name=\"markdown\",\n display_name=\"Markdown Export\",\n info=\"Export processed documents to Markdown format. Only available when advanced mode is enabled.\",\n value=False,\n show=False,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\"),\n ]\n\n def _path_value(self, template) -> list[str]:\n # Get current path value\n return template.get(\"path\", {}).get(\"file_path\", [])\n\n def update_build_config(\n self,\n build_config: dict[str, Any],\n field_value: Any,\n field_name: str | None = None,\n ) -> dict[str, Any]:\n \"\"\"Update build configuration to show/hide fields based on file count and advanced_mode.\"\"\"\n if field_name == \"path\":\n # Get current path value\n path_value = self._path_value(build_config)\n file_path = path_value[0] if len(path_value) > 0 else \"\"\n\n # Show/hide Advanced Parser based on file count (only for single files)\n file_count = len(field_value) if field_value else 0\n if file_count == 1 and not file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n build_config[\"advanced_mode\"][\"show\"] = True\n else:\n build_config[\"advanced_mode\"][\"show\"] = False\n build_config[\"advanced_mode\"][\"value\"] = False # Reset to False when hidden\n\n # Hide all advanced fields when Advanced Parser is not available\n advanced_fields = [\n \"pipeline\",\n \"ocr_engine\",\n \"doc_key\",\n \"md_image_placeholder\",\n \"md_page_break_placeholder\",\n ]\n for field in advanced_fields:\n if field in build_config:\n build_config[field][\"show\"] = False\n\n elif field_name == \"advanced_mode\":\n # Show/hide advanced fields based on advanced_mode (only if single file)\n advanced_fields = [\n \"pipeline\",\n \"ocr_engine\",\n \"doc_key\",\n \"md_image_placeholder\",\n \"md_page_break_placeholder\",\n ]\n\n for field in advanced_fields:\n if field in build_config:\n build_config[field][\"show\"] = field_value\n\n return build_config\n\n def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]: # noqa: ARG002\n \"\"\"Dynamically show outputs based on the number of files and their types.\"\"\"\n if field_name not in [\"path\", \"advanced_mode\"]:\n return frontend_node\n\n # Add outputs based on the number of files in the path\n template = frontend_node.get(\"template\", {})\n path_value = self._path_value(template)\n if len(path_value) == 0:\n return frontend_node\n\n # Clear existing outputs\n frontend_node[\"outputs\"] = []\n\n if len(path_value) == 1:\n # We need to check if the file is structured content\n file_path = path_value[0] if field_name == \"path\" else frontend_node[\"template\"][\"path\"][\"file_path\"][0]\n if file_path.endswith((\".csv\", \".xlsx\", \".parquet\")):\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Content\", name=\"dataframe\", method=\"load_files_structured\"),\n )\n elif file_path.endswith(\".json\"):\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Content\", name=\"json\", method=\"load_files_json\"),\n )\n\n # Add outputs based on advanced mode\n advanced_mode = frontend_node.get(\"template\", {}).get(\"advanced_mode\", {}).get(\"value\", False)\n\n if advanced_mode:\n # Advanced mode: Structured Output, Markdown, and File Path\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Structured Output\", name=\"advanced\", method=\"load_files_advanced\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Markdown\", name=\"markdown\", method=\"load_files_markdown\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\"),\n )\n else:\n # Normal mode: Raw Content and File Path\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Raw Content\", name=\"message\", method=\"load_files_message\"),\n )\n frontend_node[\"outputs\"].append(\n Output(display_name=\"File Path\", name=\"path\", method=\"load_files_path\"),\n )\n else:\n # For multiple files, we show the files output (DataFrame format)\n # Advanced Parser is not available for multiple files\n frontend_node[\"outputs\"].append(\n Output(display_name=\"Files\", name=\"dataframe\", method=\"load_files\"),\n )\n\n return frontend_node\n\n def _try_import_docling(self) -> DoclingImports | None:\n \"\"\"Try different import strategies for docling components.\"\"\"\n # Try strategy 1: Latest docling structure\n try:\n from docling.datamodel.base_models import ConversionStatus, InputFormat # type: ignore[import-untyped]\n from docling.document_converter import DocumentConverter # type: ignore[import-untyped]\n from docling_core.types.doc import ImageRefMode # type: ignore[import-untyped]\n\n self.log(\"Using latest docling import structure\")\n return DoclingImports(\n conversion_status=ConversionStatus,\n input_format=InputFormat,\n document_converter=DocumentConverter,\n image_ref_mode=ImageRefMode,\n strategy=\"latest\",\n )\n except ImportError as e:\n self.log(f\"Latest docling structure failed: {e}\")\n\n # Try strategy 2: Alternative import paths\n try:\n from docling.document_converter import DocumentConverter # type: ignore[import-untyped]\n from docling_core.types.doc import ImageRefMode # type: ignore[import-untyped]\n\n # Try to get ConversionStatus from different locations\n conversion_status: type[Enum] = MockConversionStatus\n input_format: type[Enum] = MockInputFormat\n\n try:\n from docling_core.types import ConversionStatus, InputFormat # type: ignore[import-untyped]\n\n conversion_status = ConversionStatus\n input_format = InputFormat\n except ImportError:\n try:\n from docling.datamodel import ConversionStatus, InputFormat # type: ignore[import-untyped]\n\n conversion_status = ConversionStatus\n input_format = InputFormat\n except ImportError:\n # Use mock enums if we can't find them\n pass\n\n self.log(\"Using alternative docling import structure\")\n return DoclingImports(\n conversion_status=conversion_status,\n input_format=input_format,\n document_converter=DocumentConverter,\n image_ref_mode=ImageRefMode,\n strategy=\"alternative\",\n )\n except ImportError as e:\n self.log(f\"Alternative docling structure failed: {e}\")\n\n # Try strategy 3: Basic converter only\n try:\n from docling.document_converter import DocumentConverter # type: ignore[import-untyped]\n\n self.log(\"Using basic docling import structure with mocks\")\n return DoclingImports(\n conversion_status=MockConversionStatus,\n input_format=MockInputFormat,\n document_converter=DocumentConverter,\n image_ref_mode=MockImageRefMode,\n strategy=\"basic\",\n )\n except ImportError as e:\n self.log(f\"Basic docling structure failed: {e}\")\n\n # Strategy 4: Complete fallback - return None to indicate failure\n return None\n\n def _create_advanced_converter(self, docling_imports: DoclingImports) -> Any:\n \"\"\"Create advanced converter with pipeline options if available.\"\"\"\n try:\n from docling.datamodel.pipeline_options import PdfPipelineOptions # type: ignore[import-untyped]\n from docling.document_converter import PdfFormatOption # type: ignore[import-untyped]\n\n document_converter = docling_imports.document_converter\n input_format = docling_imports.input_format\n\n # Create basic pipeline options\n pipeline_options = PdfPipelineOptions()\n\n # Configure OCR if specified and available\n if self.ocr_engine:\n try:\n from docling.models.factories import get_ocr_factory # type: ignore[import-untyped]\n\n pipeline_options.do_ocr = True\n ocr_factory = get_ocr_factory(allow_external_plugins=False)\n ocr_options = ocr_factory.create_options(kind=self.ocr_engine)\n pipeline_options.ocr_options = ocr_options\n self.log(f\"Configured OCR with engine: {self.ocr_engine}\")\n except Exception as e: # noqa: BLE001\n self.log(f\"Could not configure OCR: {e}, proceeding without OCR\")\n pipeline_options.do_ocr = False\n\n # Create format options\n pdf_format_option = PdfFormatOption(pipeline_options=pipeline_options)\n format_options = {}\n if hasattr(input_format, \"PDF\"):\n format_options[input_format.PDF] = pdf_format_option\n if hasattr(input_format, \"IMAGE\"):\n format_options[input_format.IMAGE] = pdf_format_option\n\n return document_converter(format_options=format_options)\n\n except Exception as e: # noqa: BLE001\n self.log(f\"Could not create advanced converter: {e}, using basic converter\")\n return docling_imports.document_converter()\n\n def _is_docling_compatible(self, file_path: str) -> bool:\n \"\"\"Check if file is compatible with Docling processing.\"\"\"\n # All VALID_EXTENSIONS are Docling compatible (except for TEXT_FILE_TYPES which may overlap)\n docling_extensions = [\n \".adoc\",\n \".asciidoc\",\n \".asc\",\n \".bmp\",\n \".csv\",\n \".dotx\",\n \".dotm\",\n \".docm\",\n \".docx\",\n \".htm\",\n \".html\",\n \".jpeg\",\n \".json\",\n \".md\",\n \".pdf\",\n \".png\",\n \".potx\",\n \".ppsx\",\n \".pptm\",\n \".potm\",\n \".ppsm\",\n \".pptx\",\n \".tiff\",\n \".txt\",\n \".xls\",\n \".xlsx\",\n \".xhtml\",\n \".xml\",\n \".webp\",\n ]\n return any(file_path.lower().endswith(ext) for ext in docling_extensions)\n\n def process_files(\n self,\n file_list: list[BaseFileComponent.BaseFile],\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process files using standard parsing or Docling based on advanced_mode and file type.\"\"\"\n\n def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:\n \"\"\"Process a single file using standard text parsing.\"\"\"\n try:\n return parse_text_file_to_data(file_path, silent_errors=silent_errors)\n except FileNotFoundError as e:\n msg = f\"File not found: {file_path}. Error: {e}\"\n self.log(msg)\n if not silent_errors:\n raise\n return None\n except Exception as e:\n msg = f\"Unexpected error processing {file_path}: {e}\"\n self.log(msg)\n if not silent_errors:\n raise\n return None\n\n def process_file_docling(file_path: str, *, silent_errors: bool = False) -> Data | None:\n \"\"\"Process a single file using Docling if compatible, otherwise standard processing.\"\"\"\n # Try Docling first if file is compatible and advanced mode is enabled\n try:\n return self._process_with_docling_and_export(file_path)\n except Exception as e: # noqa: BLE001\n self.log(f\"Docling processing failed for {file_path}: {e}, falling back to standard processing\")\n if not silent_errors:\n # Return error data instead of raising\n return Data(data={\"error\": f\"Docling processing failed: {e}\", \"file_path\": file_path})\n\n return None\n\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n file_path = str(file_list[0].path)\n if self.advanced_mode and self._is_docling_compatible(file_path):\n processed_data = process_file_docling(file_path)\n if not processed_data:\n msg = f\"Failed to process file with Docling: {file_path}\"\n raise ValueError(msg)\n\n # Serialize processed data to match Data structure\n serialized_data = processed_data.serialize_model()\n\n # Now, if doc is nested, we need to unravel it\n clean_data: list[Data | None] = [processed_data]\n\n # This is where we've manually processed the data\n try:\n if \"exported_content\" not in serialized_data:\n clean_data = [\n Data(\n data={\n \"file_path\": file_path,\n **(\n item[\"element\"]\n if \"element\" in item\n else {k: v for k, v in item.items() if k != \"file_path\"}\n ),\n }\n )\n for item in serialized_data[\"doc\"]\n ]\n except Exception as _: # noqa: BLE001\n raise ValueError(serialized_data) from None\n\n # Repeat file_list to match the number of processed data elements\n final_data: list[Data | None] = clean_data\n return self.rollup_data(file_list, final_data)\n\n concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)\n file_count = len(file_list)\n\n self.log(f\"Starting parallel processing of {file_count} files with concurrency: {concurrency}.\")\n file_paths = [str(file.path) for file in file_list]\n my_data = parallel_load_data(\n file_paths,\n silent_errors=self.silent_errors,\n load_function=process_file_standard,\n max_concurrency=concurrency,\n )\n\n return self.rollup_data(file_list, my_data)\n\n def load_files_advanced(self) -> DataFrame:\n \"\"\"Load files using advanced Docling processing and export to an advanced format.\"\"\"\n # TODO: Update\n self.markdown = False\n return self.load_files()\n\n def load_files_markdown(self) -> Message:\n \"\"\"Load files using advanced Docling processing and export to Markdown format.\"\"\"\n self.markdown = True\n result = self.load_files()\n return Message(text=str(result.text[0]))\n\n def _process_with_docling_and_export(self, file_path: str) -> Data:\n \"\"\"Process a single file with Docling and export to the specified format.\"\"\"\n # Import docling components only when needed\n docling_imports = self._try_import_docling()\n\n if docling_imports is None:\n msg = \"Docling not available for advanced processing\"\n raise ImportError(msg)\n\n conversion_status = docling_imports.conversion_status\n document_converter = docling_imports.document_converter\n image_ref_mode = docling_imports.image_ref_mode\n\n try:\n # Create converter based on strategy and pipeline setting\n if docling_imports.strategy == \"latest\" and self.pipeline == \"standard\":\n converter = self._create_advanced_converter(docling_imports)\n else:\n # Use basic converter for compatibility\n converter = document_converter()\n self.log(\"Using basic DocumentConverter for Docling processing\")\n\n # Process single file\n result = converter.convert(file_path)\n\n # Check if conversion was successful\n success = False\n if hasattr(result, \"status\"):\n if hasattr(conversion_status, \"SUCCESS\"):\n success = result.status == conversion_status.SUCCESS\n else:\n success = str(result.status).lower() == \"success\"\n elif hasattr(result, \"document\"):\n # If no status but has document, assume success\n success = result.document is not None\n\n if not success:\n return Data(data={\"error\": \"Docling conversion failed\", \"file_path\": file_path})\n\n if self.markdown:\n self.log(\"Exporting document to Markdown format\")\n # Export the document to the specified format\n exported_content = self._export_document(result.document, image_ref_mode)\n\n return Data(\n text=exported_content,\n data={\n \"exported_content\": exported_content,\n \"export_format\": self.EXPORT_FORMAT,\n \"file_path\": file_path,\n },\n )\n\n return Data(\n data={\n \"doc\": self.docling_to_dataframe_simple(result.document.export_to_dict()),\n \"export_format\": self.EXPORT_FORMAT,\n \"file_path\": file_path,\n }\n )\n\n except Exception as e: # noqa: BLE001\n return Data(data={\"error\": f\"Docling processing error: {e!s}\", \"file_path\": file_path})\n\n def docling_to_dataframe_simple(self, doc):\n \"\"\"Extract all text elements into a simple DataFrame.\"\"\"\n return [\n {\n \"page_no\": text[\"prov\"][0][\"page_no\"] if text[\"prov\"] else None,\n \"label\": text[\"label\"],\n \"text\": text[\"text\"],\n \"level\": text.get(\"level\", None), # for headers\n }\n for text in doc[\"texts\"]\n ]\n\n def _export_document(self, document: Any, image_ref_mode: type[Enum]) -> str:\n \"\"\"Export document to Markdown format with placeholder images.\"\"\"\n try:\n image_mode = (\n image_ref_mode(self.IMAGE_MODE) if hasattr(image_ref_mode, self.IMAGE_MODE) else self.IMAGE_MODE\n )\n\n # Always export to Markdown since it's fixed\n return document.export_to_markdown(\n image_mode=image_mode,\n image_placeholder=self.md_image_placeholder,\n page_break_placeholder=self.md_page_break_placeholder,\n )\n\n except Exception as e: # noqa: BLE001\n self.log(f\"Markdown export failed: {e}, using basic text export\")\n # Fallback to basic text export\n try:\n return document.export_to_text()\n except Exception: # noqa: BLE001\n return str(document)\n" + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" }, - "concurrency_multithreading": { - "_input_type": "IntInput", + "astradb_vectorstore_kwargs": { + "_input_type": "NestedDictInput", "advanced": true, - "display_name": "Processing Concurrency", + "display_name": "AstraDBVectorStore Parameters", "dynamic": false, - "info": "When multiple files are being processed, the number of files to process concurrently.", + "info": "Optional dictionary of additional parameters for the AstraDBVectorStore.", "list": false, "list_add_label": "Add More", - "name": "concurrency_multithreading", + "name": "astradb_vectorstore_kwargs", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "int", - "value": 1 + "type": "NestedDict", + "value": {} }, - "delete_server_file_after_processing": { + "autodetect_collection": { "_input_type": "BoolInput", "advanced": true, - "display_name": "Delete Server File After Processing", + "display_name": "Autodetect Collection", "dynamic": false, - "info": "If true, the Server File Path will be deleted after processing.", + "info": "Boolean flag to determine whether to autodetect the collection.", "list": false, "list_add_label": "Add More", - "name": "delete_server_file_after_processing", + "name": "autodetect_collection", "placeholder": "", "required": false, "show": true, @@ -4429,136 +4261,347 @@ "type": "bool", "value": true }, - "file_path": { - "_input_type": "HandleInput", + "code": { "advanced": true, - "display_name": "Server File Path", + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n 'your database in Astra DB'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f''\n \"your database in Astra DB.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + }, + "collection_name": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": true, + "dialog_inputs": { + "fields": { + "data": { + "node": { + "description": "Please allow several seconds for creation to complete.", + "display_name": "Create new collection", + "field_order": [ + "01_new_collection_name", + "02_embedding_generation_provider", + "03_embedding_generation_model", + "04_dimension" + ], + "name": "create_collection", + "template": { + "01_new_collection_name": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Name", + "dynamic": false, + "info": "Name of the new collection to create in Astra DB.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "new_collection_name", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "02_embedding_generation_provider": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Embedding generation method", + "dynamic": false, + "helper_text": "To create collections with more embedding provider options, go to your database in Astra DB", + "info": "Provider to use for generating embeddings.", + "name": "embedding_generation_provider", + "options": [], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "03_embedding_generation_model": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Embedding model", + "dynamic": false, + "info": "Model to use for generating embeddings.", + "name": "embedding_generation_model", + "options": [], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "04_dimension": { + "_input_type": "IntInput", + "advanced": false, + "display_name": "Dimensions", + "dynamic": false, + "info": "Dimensions of the embeddings to generate.", + "list": false, + "list_add_label": "Add More", + "name": "dimension", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int" + } + } + } + } + }, + "functionality": "create" + }, + "display_name": "Collection", "dynamic": false, - "info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.", - "input_types": [ - "Data", - "Message" - ], - "list": true, + "info": "The name of the collection within Astra DB where the vectors will be stored.", + "name": "collection_name", + "options": [], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "refresh_button": true, + "required": true, + "show": false, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "content_field": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Content Field", + "dynamic": false, + "info": "Field to use as the text content field for the vector store.", + "list": false, "list_add_label": "Add More", - "name": "file_path", + "load_from_db": false, + "name": "content_field", "placeholder": "", "required": false, "show": true, "title_case": false, + "tool_mode": false, "trace_as_metadata": true, - "type": "other", + "type": "str", "value": "" }, - "ignore_unspecified_files": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Ignore Unspecified Files", + "database_name": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": true, + "dialog_inputs": { + "fields": { + "data": { + "node": { + "description": "Please allow several minutes for creation to complete.", + "display_name": "Create new database", + "field_order": [ + "01_new_database_name", + "02_cloud_provider", + "03_region" + ], + "name": "create_database", + "template": { + "01_new_database_name": { + "_input_type": "StrInput", + "advanced": false, + "display_name": "Name", + "dynamic": false, + "info": "Name of the new database to create in Astra DB.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "new_database_name", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "02_cloud_provider": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Cloud provider", + "dynamic": false, + "info": "Cloud provider for the new database.", + "name": "cloud_provider", + "options": [ + "Amazon Web Services", + "Google Cloud Platform", + "Microsoft Azure" + ], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "03_region": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Region", + "dynamic": false, + "info": "Region for the new database.", + "name": "region", + "options": [], + "options_metadata": [], + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + } + } + } + } + }, + "functionality": "create" + }, + "display_name": "Database", "dynamic": false, - "info": "If true, Data with no 'file_path' property will be ignored.", - "list": false, - "list_add_label": "Add More", - "name": "ignore_unspecified_files", + "info": "The Database name for the Astra DB instance.", + "name": "database_name", + "options": [], + "options_metadata": [], "placeholder": "", - "required": false, + "real_time_refresh": true, + "refresh_button": true, + "required": true, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, - "type": "bool", - "value": false + "type": "str", + "value": "" }, - "ignore_unsupported_extensions": { - "_input_type": "BoolInput", + "deletion_field": { + "_input_type": "StrInput", "advanced": true, - "display_name": "Ignore Unsupported Extensions", + "display_name": "Deletion Based On Field", "dynamic": false, - "info": "If true, files with unsupported extensions will not be processed.", + "info": "When this parameter is provided, documents in the target collection with metadata field values matching the input metadata field value will be deleted before new data is loaded.", "list": false, "list_add_label": "Add More", - "name": "ignore_unsupported_extensions", + "load_from_db": false, + "name": "deletion_field", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "str", + "value": "" }, - "path": { - "_input_type": "FileInput", + "embedding_model": { + "_input_type": "HandleInput", "advanced": false, - "display_name": "Files", + "display_name": "Embedding Model", "dynamic": false, - "fileTypes": [ - "txt", - "md", - "mdx", - "csv", - "json", - "yaml", - "yml", - "xml", - "html", - "htm", - "pdf", - "docx", - "py", - "sh", - "sql", - "js", - "ts", - "tsx", - "zip", - "tar", - "tgz", - "bz2", - "gz" + "info": "Specify the Embedding Model. Not required for Astra Vectorize collections.", + "input_types": [ + "Embeddings" ], - "file_path": [], - "info": "Supported file extensions: txt, md, mdx, csv, json, yaml, yml, xml, html, htm, pdf, docx, py, sh, sql, js, ts, tsx; optionally bundled in file extensions: zip, tar, tgz, bz2, gz", - "list": true, + "list": false, "list_add_label": "Add More", - "name": "path", + "name": "embedding_model", "placeholder": "", - "real_time_refresh": true, "required": false, - "show": true, - "temp_file": false, + "show": false, "title_case": false, "trace_as_metadata": true, - "type": "file", + "type": "other", "value": "" }, - "separator": { - "_input_type": "StrInput", + "environment": { + "_input_type": "DropdownInput", "advanced": true, - "display_name": "Separator", + "combobox": true, + "dialog_inputs": {}, + "display_name": "Environment", "dynamic": false, - "info": "Specify the separator to use between multiple outputs in Message format.", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "separator", + "info": "The environment for the Astra DB API Endpoint.", + "name": "environment", + "options": [ + "prod", + "test", + "dev" + ], + "options_metadata": [], "placeholder": "", + "real_time_refresh": true, "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "\n\n" + "value": "prod" }, - "silent_errors": { + "ignore_invalid_documents": { "_input_type": "BoolInput", "advanced": true, - "display_name": "Silent Errors", + "display_name": "Ignore Invalid Documents", "dynamic": false, - "info": "If true, errors will not raise an exception.", + "info": "Boolean flag to determine whether to ignore invalid documents at runtime.", "list": false, "list_add_label": "Add More", - "name": "silent_errors", + "name": "ignore_invalid_documents", "placeholder": "", "required": false, "show": true, @@ -4568,232 +4611,131 @@ "type": "bool", "value": false }, - "use_multithreading": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "[Deprecated] Use Multithreading", - "dynamic": false, - "info": "Set 'Processing Concurrency' greater than 1 to enable multithreading.", - "list": false, - "list_add_label": "Add More", - "name": "use_multithreading", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - } - }, - "tool_mode": false - }, - "showNode": true, - "type": "File" - }, - "dragging": false, - "id": "File-vusZ2", - "measured": { - "height": 230, - "width": 320 - }, - "position": { - "x": 1330.7650978046952, - "y": 1431.5905495627503 - }, - "selected": false, - "type": "genericNode" - }, - { - "data": { - "id": "LanguageModelComponent-1uhUK", - "node": { - "base_classes": [ - "LanguageModel", - "Message" - ], - "beta": false, - "conditional_paths": [], - "custom_fields": {}, - "description": "Runs a language model given a specified provider. ", - "display_name": "Language Model", - "documentation": "", - "edited": false, - "field_order": [ - "provider", - "model_name", - "api_key", - "input_value", - "system_message", - "stream", - "temperature" - ], - "frozen": false, - "icon": "brain-circuit", - "legacy": false, - "metadata": { - "keywords": [ - "model", - "llm", - "language model", - "large language model" - ] - }, - "minimized": false, - "output_types": [], - "outputs": [ - { - "allows_loop": false, - "cache": true, - "display_name": "Model Response", - "group_outputs": false, - "method": "text_response", - "name": "text_output", - "options": null, - "required_inputs": null, - "selected": "Message", - "tool_mode": true, - "types": [ - "Message" - ], - "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "Language Model", - "group_outputs": false, - "method": "build_model", - "name": "model_output", - "options": null, - "required_inputs": null, - "selected": "LanguageModel", - "tool_mode": true, - "types": [ - "LanguageModel" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "priority": 0, - "template": { - "_type": "Component", - "api_key": { - "_input_type": "SecretStrInput", + "ingest_data": { + "_input_type": "HandleInput", "advanced": false, - "display_name": "OpenAI API Key", + "display_name": "Ingest Data", "dynamic": false, - "info": "Model Provider API key", - "input_types": [], - "load_from_db": true, - "name": "api_key", - "password": true, + "info": "", + "input_types": [ + "Data", + "DataFrame" + ], + "list": true, + "list_add_label": "Add More", + "name": "ingest_data", "placeholder": "", - "real_time_refresh": true, "required": false, "show": true, "title_case": false, - "type": "str", - "value": "OPENAI_API_KEY" + "trace_as_metadata": true, + "type": "other", + "value": "" }, - "code": { + "keyspace": { + "_input_type": "DropdownInput", "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Keyspace", + "dynamic": false, + "info": "Optional keyspace within Astra DB to use for the collection.", + "name": "keyspace", + "options": [], + "options_metadata": [], "placeholder": "", - "required": true, + "real_time_refresh": true, + "required": false, "show": true, "title_case": false, - "type": "code", - "value": "from typing import Any\n\nfrom langchain_anthropic import ChatAnthropic\nfrom langchain_google_genai import ChatGoogleGenerativeAI\nfrom langchain_openai import ChatOpenAI\n\nfrom langflow.base.models.anthropic_constants import ANTHROPIC_MODELS\nfrom langflow.base.models.google_generative_ai_constants import GOOGLE_GENERATIVE_AI_MODELS\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.openai_constants import OPENAI_CHAT_MODEL_NAMES, OPENAI_REASONING_MODEL_NAMES\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import BoolInput\nfrom langflow.io import DropdownInput, MessageInput, MultilineInput, SecretStrInput, SliderInput\nfrom langflow.schema.dotdict import dotdict\n\n\nclass LanguageModelComponent(LCModelComponent):\n display_name = \"Language Model\"\n description = \"Runs a language model given a specified provider.\"\n documentation: str = \"https://docs.langflow.org/components-models\"\n icon = \"brain-circuit\"\n category = \"models\"\n priority = 0 # Set priority to 0 to make it appear first\n\n inputs = [\n DropdownInput(\n name=\"provider\",\n display_name=\"Model Provider\",\n options=[\"OpenAI\", \"Anthropic\", \"Google\"],\n value=\"OpenAI\",\n info=\"Select the model provider\",\n real_time_refresh=True,\n options_metadata=[{\"icon\": \"OpenAI\"}, {\"icon\": \"Anthropic\"}, {\"icon\": \"GoogleGenerativeAI\"}],\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES,\n value=OPENAI_CHAT_MODEL_NAMES[0],\n info=\"Select the model to use\",\n real_time_refresh=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"OpenAI API Key\",\n info=\"Model Provider API key\",\n required=False,\n show=True,\n real_time_refresh=True,\n ),\n MessageInput(\n name=\"input_value\",\n display_name=\"Input\",\n info=\"The input text to send to the model\",\n ),\n MultilineInput(\n name=\"system_message\",\n display_name=\"System Message\",\n info=\"A system message that helps set the behavior of the assistant\",\n advanced=False,\n ),\n BoolInput(\n name=\"stream\",\n display_name=\"Stream\",\n info=\"Whether to stream the response\",\n value=False,\n advanced=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n info=\"Controls randomness in responses\",\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n ]\n\n def build_model(self) -> LanguageModel:\n provider = self.provider\n model_name = self.model_name\n temperature = self.temperature\n stream = self.stream\n\n if provider == \"OpenAI\":\n if not self.api_key:\n msg = \"OpenAI API key is required when using OpenAI provider\"\n raise ValueError(msg)\n\n if model_name in OPENAI_REASONING_MODEL_NAMES:\n # reasoning models do not support temperature (yet)\n temperature = None\n\n return ChatOpenAI(\n model_name=model_name,\n temperature=temperature,\n streaming=stream,\n openai_api_key=self.api_key,\n )\n if provider == \"Anthropic\":\n if not self.api_key:\n msg = \"Anthropic API key is required when using Anthropic provider\"\n raise ValueError(msg)\n return ChatAnthropic(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n anthropic_api_key=self.api_key,\n )\n if provider == \"Google\":\n if not self.api_key:\n msg = \"Google API key is required when using Google provider\"\n raise ValueError(msg)\n return ChatGoogleGenerativeAI(\n model=model_name,\n temperature=temperature,\n streaming=stream,\n google_api_key=self.api_key,\n )\n msg = f\"Unknown provider: {provider}\"\n raise ValueError(msg)\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n if field_name == \"provider\":\n if field_value == \"OpenAI\":\n build_config[\"model_name\"][\"options\"] = OPENAI_CHAT_MODEL_NAMES + OPENAI_REASONING_MODEL_NAMES\n build_config[\"model_name\"][\"value\"] = OPENAI_CHAT_MODEL_NAMES[0]\n build_config[\"api_key\"][\"display_name\"] = \"OpenAI API Key\"\n elif field_value == \"Anthropic\":\n build_config[\"model_name\"][\"options\"] = ANTHROPIC_MODELS\n build_config[\"model_name\"][\"value\"] = ANTHROPIC_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Anthropic API Key\"\n elif field_value == \"Google\":\n build_config[\"model_name\"][\"options\"] = GOOGLE_GENERATIVE_AI_MODELS\n build_config[\"model_name\"][\"value\"] = GOOGLE_GENERATIVE_AI_MODELS[0]\n build_config[\"api_key\"][\"display_name\"] = \"Google API Key\"\n elif field_name == \"model_name\" and field_value.startswith(\"o1\") and self.provider == \"OpenAI\":\n # Hide system_message for o1 models - currently unsupported\n if \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = False\n elif field_name == \"model_name\" and not field_value.startswith(\"o1\") and \"system_message\" in build_config:\n build_config[\"system_message\"][\"show\"] = True\n return build_config\n" + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" }, - "input_value": { - "_input_type": "MessageInput", + "lexical_terms": { + "_input_type": "QueryInput", "advanced": false, - "display_name": "Input", + "display_name": "Lexical Terms", "dynamic": false, - "info": "The input text to send to the model", + "info": "Add additional terms/keywords to augment search precision.", "input_types": [ "Message" ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "name": "input_value", - "placeholder": "", + "name": "lexical_terms", + "placeholder": "Enter terms to search...", "required": false, - "show": true, + "separator": " ", + "show": false, "title_case": false, "tool_mode": false, "trace_as_input": true, "trace_as_metadata": true, - "type": "str", + "type": "query", "value": "" }, - "model_name": { + "number_of_results": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Number of Search Results", + "dynamic": false, + "info": "Number of search results to return.", + "list": false, + "list_add_label": "Add More", + "name": "number_of_results", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 4 + }, + "reranker": { "_input_type": "DropdownInput", "advanced": false, "combobox": false, "dialog_inputs": {}, - "display_name": "Model Name", + "display_name": "Reranker", "dynamic": false, - "info": "Select the model to use", - "name": "model_name", - "options": [ - "gpt-4o-mini", - "gpt-4o", - "gpt-4.1", - "gpt-4.1-mini", - "gpt-4.1-nano", - "gpt-4.5-preview", - "gpt-4-turbo", - "gpt-4-turbo-preview", - "gpt-4", - "gpt-3.5-turbo" - ], + "info": "Post-retrieval model that re-scores results for optimal relevance ranking.", + "name": "reranker", + "options": [], "options_metadata": [], "placeholder": "", "required": false, - "show": true, + "show": false, "title_case": false, - "toggle": false, + "toggle": true, "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "gpt-4o-mini" + "value": "" }, - "provider": { + "search_method": { "_input_type": "DropdownInput", - "advanced": false, + "advanced": true, "combobox": false, "dialog_inputs": {}, - "display_name": "Model Provider", + "display_name": "Search Method", "dynamic": false, - "info": "Select the model provider", - "name": "provider", + "info": "Determine how your content is matched: Vector finds semantic similarity, and Hybrid Search (suggested) combines both approaches with a reranker.", + "name": "search_method", "options": [ - "OpenAI", - "Anthropic", - "Google" + "Hybrid Search", + "Vector Search" ], "options_metadata": [ { - "icon": "OpenAI" - }, - { - "icon": "Anthropic" + "icon": "SearchHybrid" }, { - "icon": "GoogleGenerativeAI" + "icon": "SearchVector" } ], "placeholder": "", @@ -4805,111 +4747,142 @@ "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "OpenAI" + "value": "Vector Search" }, - "stream": { - "_input_type": "BoolInput", + "search_query": { + "_input_type": "QueryInput", + "advanced": false, + "display_name": "Search Query", + "dynamic": false, + "info": "Enter a query to run a similarity search.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "search_query", + "placeholder": "Enter a query...", + "required": false, + "show": true, + "title_case": false, + "tool_mode": true, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "query", + "value": "" + }, + "search_score_threshold": { + "_input_type": "FloatInput", "advanced": true, - "display_name": "Stream", + "display_name": "Search Score Threshold", "dynamic": false, - "info": "Whether to stream the response", + "info": "Minimum similarity score threshold for search results. (when using 'Similarity with score threshold')", "list": false, "list_add_label": "Add More", - "name": "stream", + "name": "search_score_threshold", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, - "type": "bool", - "value": false + "type": "float", + "value": 0 }, - "system_message": { - "_input_type": "MultilineInput", + "search_type": { + "_input_type": "DropdownInput", "advanced": true, - "copy_field": false, - "display_name": "System Message", + "combobox": false, + "dialog_inputs": {}, + "display_name": "Search Type", "dynamic": false, - "info": "A system message that helps set the behavior of the assistant", - "input_types": [ - "Message" + "info": "Search type to use", + "name": "search_type", + "options": [ + "Similarity", + "Similarity with score threshold", + "MMR (Max Marginal Relevance)" ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "multiline": true, - "name": "system_message", + "options_metadata": [], "placeholder": "", "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "" + "value": "Similarity" }, - "temperature": { - "_input_type": "SliderInput", + "should_cache_vector_store": { + "_input_type": "BoolInput", "advanced": true, - "display_name": "Temperature", + "display_name": "Cache Vector Store", "dynamic": false, - "info": "Controls randomness in responses", - "max_label": "", - "max_label_icon": "", - "min_label": "", - "min_label_icon": "", - "name": "temperature", + "info": "If True, the vector store will be cached for the current build of the component. This is useful for components that have multiple output methods and want to share the same vector store.", + "list": false, + "list_add_label": "Add More", + "name": "should_cache_vector_store", "placeholder": "", - "range_spec": { - "max": 1, - "min": 0, - "step": 0.01, - "step_type": "float" - }, "required": false, "show": true, - "slider_buttons": false, - "slider_buttons_options": [], - "slider_input": false, "title_case": false, "tool_mode": false, - "type": "slider", - "value": 0.1 + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "token": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Astra DB Application Token", + "dynamic": false, + "info": "Authentication token for accessing Astra DB.", + "input_types": [], + "load_from_db": true, + "name": "token", + "password": true, + "placeholder": "", + "real_time_refresh": true, + "required": true, + "show": true, + "title_case": false, + "type": "str", + "value": "ASTRA_DB_APPLICATION_TOKEN" } }, "tool_mode": false }, - "selected_output": "text_output", + "selected_output": "search_results", "showNode": true, - "type": "LanguageModelComponent" + "type": "AstraDB" }, "dragging": false, - "id": "LanguageModelComponent-1uhUK", + "id": "AstraDB-WAq6g", "measured": { - "height": 451, + "height": 457, "width": 320 }, "position": { - "x": 2354.7612483129965, - "y": 633.8261067248878 + "x": 1208.3141828517844, + "y": 539.6819126252909 }, "selected": false, "type": "genericNode" } ], "viewport": { - "x": -22.84629031494228, - "y": -151.44728538879235, - "zoom": 0.45963552948592706 + "x": -129.50701026456488, + "y": -80.5782691570264, + "zoom": 0.46801321898737636 } }, "description": "Load your data for chat context with Retrieval Augmented Generation.", "endpoint_name": null, - "id": "b65cafc6-9f8e-4137-ad2b-4c663822f512", + "id": "b59cc5b1-9dae-4064-bfea-08a20b9d7080", "is_component": false, - "last_tested_version": "1.4.3", + "last_tested_version": "1.5.0.post2", "name": "Vector Store RAG", "tags": [ "openai", From a606a94fa756bdd546060a7b30cd6d793c38bcb4 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 26 Aug 2025 16:39:21 +0000 Subject: [PATCH 2/4] [autofix.ci] apply automated fixes --- .../starter_projects/Hybrid Search RAG.json | 45 ++++++++----------- .../starter_projects/Vector Store RAG.json | 44 +++++++----------- 2 files changed, 35 insertions(+), 54 deletions(-) diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json index e390d2feb385..f7e3ac865e0e 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Hybrid Search RAG.json @@ -27,9 +27,9 @@ "id": "reactflow__edge-ParserComponent-0KvmM{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-0KvmMœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-zViXc{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-zViXcœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", "selected": false, "source": "ParserComponent-0KvmM", - "sourceHandle": "{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-0KvmMœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-0KvmMœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", "target": "ChatOutput-zViXc", - "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-zViXcœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}" + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-zViXcœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}" }, { "animated": false, @@ -55,9 +55,9 @@ "id": "reactflow__edge-LanguageModelComponent-CRZxx{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-CRZxxœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-StructuredOutput-AUzID{œfieldNameœ:œinput_valueœ,œidœ:œStructuredOutput-AUzIDœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, "source": "LanguageModelComponent-CRZxx", - "sourceHandle": "{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-CRZxxœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-CRZxxœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", "target": "StructuredOutput-AUzID", - "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œStructuredOutput-AUzIDœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œStructuredOutput-AUzIDœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -84,9 +84,9 @@ "id": "reactflow__edge-StructuredOutput-AUzID{œdataTypeœ:œStructuredOutputœ,œidœ:œStructuredOutput-AUzIDœ,œnameœ:œstructured_outputœ,œoutput_typesœ:[œDataœ]}-ParserComponent-6wYbr{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-6wYbrœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", "selected": false, "source": "StructuredOutput-AUzID", - "sourceHandle": "{œdataTypeœ:œStructuredOutputœ,œidœ:œStructuredOutput-AUzIDœ,œnameœ:œstructured_outputœ,œoutput_typesœ:[œDataœ]}", + "sourceHandle": "{œdataTypeœ: œStructuredOutputœ, œidœ: œStructuredOutput-AUzIDœ, œnameœ: œstructured_outputœ, œoutput_typesœ: [œDataœ]}", "target": "ParserComponent-6wYbr", - "targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-6wYbrœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}" + "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-6wYbrœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" }, { "animated": false, @@ -112,9 +112,9 @@ "id": "reactflow__edge-LanguageModelComponent-MD9V5{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-MD9V5œ,œnameœ:œmodel_outputœ,œoutput_typesœ:[œLanguageModelœ]}-StructuredOutput-AUzID{œfieldNameœ:œllmœ,œidœ:œStructuredOutput-AUzIDœ,œinputTypesœ:[œLanguageModelœ],œtypeœ:œotherœ}", "selected": false, "source": "LanguageModelComponent-MD9V5", - "sourceHandle": "{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-MD9V5œ,œnameœ:œmodel_outputœ,œoutput_typesœ:[œLanguageModelœ]}", + "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-MD9V5œ, œnameœ: œmodel_outputœ, œoutput_typesœ: [œLanguageModelœ]}", "target": "StructuredOutput-AUzID", - "targetHandle": "{œfieldNameœ:œllmœ,œidœ:œStructuredOutput-AUzIDœ,œinputTypesœ:[œLanguageModelœ],œtypeœ:œotherœ}" + "targetHandle": "{œfieldNameœ: œllmœ, œidœ: œStructuredOutput-AUzIDœ, œinputTypesœ: [œLanguageModelœ], œtypeœ: œotherœ}" }, { "className": "", @@ -138,9 +138,9 @@ }, "id": "xy-edge__ParserComponent-6wYbr{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-6wYbrœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-AstraDB-93cal{œfieldNameœ:œlexical_termsœ,œidœ:œAstraDB-93calœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", "source": "ParserComponent-6wYbr", - "sourceHandle": "{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-6wYbrœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-6wYbrœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", "target": "AstraDB-93cal", - "targetHandle": "{œfieldNameœ:œlexical_termsœ,œidœ:œAstraDB-93calœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}" + "targetHandle": "{œfieldNameœ: œlexical_termsœ, œidœ: œAstraDB-93calœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}" }, { "className": "", @@ -164,9 +164,9 @@ }, "id": "xy-edge__ChatInput-2JUiB{œdataTypeœ:œChatInputœ,œidœ:œChatInput-2JUiBœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-93cal{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-93calœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", "source": "ChatInput-2JUiB", - "sourceHandle": "{œdataTypeœ:œChatInputœ,œidœ:œChatInput-2JUiBœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-2JUiBœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", "target": "AstraDB-93cal", - "targetHandle": "{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-93calœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}" + "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-93calœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}" }, { "className": "", @@ -191,9 +191,9 @@ }, "id": "xy-edge__AstraDB-93cal{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-93calœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}-ParserComponent-0KvmM{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-0KvmMœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", "source": "AstraDB-93cal", - "sourceHandle": "{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-93calœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}", + "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-93calœ, œnameœ: œsearch_resultsœ, œoutput_typesœ: [œDataœ]}", "target": "ParserComponent-0KvmM", - "targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-0KvmMœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}" + "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-0KvmMœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" } ], "nodes": [ @@ -1876,6 +1876,7 @@ "group_outputs": false, "method": "build_structured_dataframe", "name": "dataframe_output", + "selected": null, "tool_mode": true, "types": [ "DataFrame" @@ -2147,7 +2148,7 @@ "description": "Ingest and search documents in Astra DB", "display_name": "Astra DB", "documentation": "https://docs.datastax.com/en/langflow/astra-components.html", - "edited": true, + "edited": false, "field_order": [ "token", "environment", @@ -2176,7 +2177,7 @@ "icon": "AstraDB", "legacy": false, "metadata": { - "code_hash": "a48033d3ac1b", + "code_hash": "23fbe9daca09", "dependencies": { "dependencies": [ { @@ -2198,7 +2199,7 @@ ], "total_dependencies": 4 }, - "module": "custom_components.astra_db" + "module": "langflow.components.datastax.astradb_vectorstore.AstraDBVectorStoreComponent" }, "minimized": false, "output_types": [], @@ -2208,11 +2209,8 @@ "cache": true, "display_name": "Search Results", "group_outputs": false, - "hidden": null, "method": "search_documents", "name": "search_results", - "options": null, - "required_inputs": null, "selected": "Data", "tool_mode": true, "types": [ @@ -2225,11 +2223,8 @@ "cache": true, "display_name": "DataFrame", "group_outputs": false, - "hidden": null, "method": "as_dataframe", "name": "dataframe", - "options": null, - "required_inputs": null, "selected": "DataFrame", "tool_mode": true, "types": [ @@ -2245,8 +2240,6 @@ "hidden": true, "method": "as_vector_store", "name": "vectorstoreconnection", - "options": null, - "required_inputs": null, "selected": "VectorStore", "tool_mode": true, "types": [ @@ -2351,7 +2344,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n 'your database in Astra DB'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=True,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f''\n \"your database in Astra DB.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" + "value": "import re\nfrom collections import defaultdict\nfrom dataclasses import asdict, dataclass, field\n\nfrom astrapy import DataAPIClient, Database\nfrom astrapy.data.info.reranking import RerankServiceOptions\nfrom astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions\nfrom langchain_astradb import AstraDBVectorStore, VectorServiceOptions\nfrom langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment\nfrom langchain_core.documents import Document\n\nfrom langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom langflow.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom langflow.helpers.data import docs_to_data\nfrom langflow.inputs.inputs import FloatInput, NestedDictInput\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n QueryInput,\n SecretStrInput,\n StrInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.serialization import serialize\nfrom langflow.utils.version import get_version_info\n\n\n@vector_store_connection\nclass AstraDBVectorStoreComponent(LCVectorStoreComponent):\n display_name: str = \"Astra DB\"\n description: str = \"Ingest and search documents in Astra DB\"\n documentation: str = \"https://docs.datastax.com/en/langflow/astra-components.html\"\n name = \"AstraDB\"\n icon: str = \"AstraDB\"\n\n _cached_vector_store: AstraDBVectorStore | None = None\n\n @dataclass\n class NewDatabaseInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_database\",\n \"description\": \"Please allow several minutes for creation to complete.\",\n \"display_name\": \"Create new database\",\n \"field_order\": [\"01_new_database_name\", \"02_cloud_provider\", \"03_region\"],\n \"template\": {\n \"01_new_database_name\": StrInput(\n name=\"new_database_name\",\n display_name=\"Name\",\n info=\"Name of the new database to create in Astra DB.\",\n required=True,\n ),\n \"02_cloud_provider\": DropdownInput(\n name=\"cloud_provider\",\n display_name=\"Cloud provider\",\n info=\"Cloud provider for the new database.\",\n options=[],\n required=True,\n real_time_refresh=True,\n ),\n \"03_region\": DropdownInput(\n name=\"region\",\n display_name=\"Region\",\n info=\"Region for the new database.\",\n options=[],\n required=True,\n ),\n },\n },\n }\n }\n )\n\n @dataclass\n class NewCollectionInput:\n functionality: str = \"create\"\n fields: dict[str, dict] = field(\n default_factory=lambda: {\n \"data\": {\n \"node\": {\n \"name\": \"create_collection\",\n \"description\": \"Please allow several seconds for creation to complete.\",\n \"display_name\": \"Create new collection\",\n \"field_order\": [\n \"01_new_collection_name\",\n \"02_embedding_generation_provider\",\n \"03_embedding_generation_model\",\n \"04_dimension\",\n ],\n \"template\": {\n \"01_new_collection_name\": StrInput(\n name=\"new_collection_name\",\n display_name=\"Name\",\n info=\"Name of the new collection to create in Astra DB.\",\n required=True,\n ),\n \"02_embedding_generation_provider\": DropdownInput(\n name=\"embedding_generation_provider\",\n display_name=\"Embedding generation method\",\n info=\"Provider to use for generating embeddings.\",\n helper_text=(\n \"To create collections with more embedding provider options, go to \"\n 'your database in Astra DB'\n ),\n real_time_refresh=True,\n required=True,\n options=[],\n ),\n \"03_embedding_generation_model\": DropdownInput(\n name=\"embedding_generation_model\",\n display_name=\"Embedding model\",\n info=\"Model to use for generating embeddings.\",\n real_time_refresh=True,\n options=[],\n ),\n \"04_dimension\": IntInput(\n name=\"dimension\",\n display_name=\"Dimensions\",\n info=\"Dimensions of the embeddings to generate.\",\n value=None,\n ),\n },\n },\n }\n }\n )\n\n inputs = [\n SecretStrInput(\n name=\"token\",\n display_name=\"Astra DB Application Token\",\n info=\"Authentication token for accessing Astra DB.\",\n value=\"ASTRA_DB_APPLICATION_TOKEN\",\n required=True,\n real_time_refresh=True,\n input_types=[],\n ),\n DropdownInput(\n name=\"environment\",\n display_name=\"Environment\",\n info=\"The environment for the Astra DB API Endpoint.\",\n options=[\"prod\", \"test\", \"dev\"],\n value=\"prod\",\n advanced=True,\n real_time_refresh=True,\n combobox=True,\n ),\n DropdownInput(\n name=\"database_name\",\n display_name=\"Database\",\n info=\"The Database name for the Astra DB instance.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewDatabaseInput()),\n combobox=True,\n ),\n DropdownInput(\n name=\"api_endpoint\",\n display_name=\"Astra DB API Endpoint\",\n info=\"The API Endpoint for the Astra DB instance. Supercedes database selection.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keyspace\",\n display_name=\"Keyspace\",\n info=\"Optional keyspace within Astra DB to use for the collection.\",\n advanced=True,\n options=[],\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"collection_name\",\n display_name=\"Collection\",\n info=\"The name of the collection within Astra DB where the vectors will be stored.\",\n required=True,\n refresh_button=True,\n real_time_refresh=True,\n dialog_inputs=asdict(NewCollectionInput()),\n combobox=True,\n show=False,\n ),\n HandleInput(\n name=\"embedding_model\",\n display_name=\"Embedding Model\",\n input_types=[\"Embeddings\"],\n info=\"Specify the Embedding Model. Not required for Astra Vectorize collections.\",\n required=False,\n show=False,\n ),\n *LCVectorStoreComponent.inputs,\n DropdownInput(\n name=\"search_method\",\n display_name=\"Search Method\",\n info=(\n \"Determine how your content is matched: Vector finds semantic similarity, \"\n \"and Hybrid Search (suggested) combines both approaches \"\n \"with a reranker.\"\n ),\n options=[\"Hybrid Search\", \"Vector Search\"], # TODO: Restore Lexical Search?\n options_metadata=[{\"icon\": \"SearchHybrid\"}, {\"icon\": \"SearchVector\"}],\n value=\"Vector Search\",\n advanced=True,\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"reranker\",\n display_name=\"Reranker\",\n info=\"Post-retrieval model that re-scores results for optimal relevance ranking.\",\n show=False,\n toggle=True,\n ),\n QueryInput(\n name=\"lexical_terms\",\n display_name=\"Lexical Terms\",\n info=\"Add additional terms/keywords to augment search precision.\",\n placeholder=\"Enter terms to search...\",\n separator=\" \",\n show=False,\n value=\"\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Number of Search Results\",\n info=\"Number of search results to return.\",\n advanced=True,\n value=4,\n ),\n DropdownInput(\n name=\"search_type\",\n display_name=\"Search Type\",\n info=\"Search type to use\",\n options=[\"Similarity\", \"Similarity with score threshold\", \"MMR (Max Marginal Relevance)\"],\n value=\"Similarity\",\n advanced=True,\n ),\n FloatInput(\n name=\"search_score_threshold\",\n display_name=\"Search Score Threshold\",\n info=\"Minimum similarity score threshold for search results. \"\n \"(when using 'Similarity with score threshold')\",\n value=0,\n advanced=True,\n ),\n NestedDictInput(\n name=\"advanced_search_filter\",\n display_name=\"Search Metadata Filter\",\n info=\"Optional dictionary of filters to apply to the search query.\",\n advanced=True,\n ),\n BoolInput(\n name=\"autodetect_collection\",\n display_name=\"Autodetect Collection\",\n info=\"Boolean flag to determine whether to autodetect the collection.\",\n advanced=True,\n value=True,\n ),\n StrInput(\n name=\"content_field\",\n display_name=\"Content Field\",\n info=\"Field to use as the text content field for the vector store.\",\n advanced=True,\n ),\n StrInput(\n name=\"deletion_field\",\n display_name=\"Deletion Based On Field\",\n info=\"When this parameter is provided, documents in the target collection with \"\n \"metadata field values matching the input metadata field value will be deleted \"\n \"before new data is loaded.\",\n advanced=True,\n ),\n BoolInput(\n name=\"ignore_invalid_documents\",\n display_name=\"Ignore Invalid Documents\",\n info=\"Boolean flag to determine whether to ignore invalid documents at runtime.\",\n advanced=True,\n ),\n NestedDictInput(\n name=\"astradb_vectorstore_kwargs\",\n display_name=\"AstraDBVectorStore Parameters\",\n info=\"Optional dictionary of additional parameters for the AstraDBVectorStore.\",\n advanced=True,\n ),\n ]\n\n @classmethod\n def map_cloud_providers(cls):\n # TODO: Programmatically fetch the regions for each cloud provider\n return {\n \"dev\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-west-2\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\", \"europe-west4\"],\n },\n },\n \"test\": {\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-central1\"],\n },\n },\n \"prod\": {\n \"Amazon Web Services\": {\n \"id\": \"aws\",\n \"regions\": [\"us-east-2\", \"ap-south-1\", \"eu-west-1\"],\n },\n \"Google Cloud Platform\": {\n \"id\": \"gcp\",\n \"regions\": [\"us-east1\"],\n },\n \"Microsoft Azure\": {\n \"id\": \"azure\",\n \"regions\": [\"westus3\"],\n },\n },\n }\n\n @classmethod\n def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):\n try:\n # Get the admin object\n client = DataAPIClient(environment=environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(api_endpoint, token=token)\n\n # Get the list of embedding providers\n embedding_providers = db_admin.find_embedding_providers()\n\n vectorize_providers_mapping = {}\n # Map the provider display name to the provider key and models\n for provider_key, provider_data in embedding_providers.embedding_providers.items():\n # Get the provider display name and models\n display_name = provider_data.display_name\n models = [model.name for model in provider_data.models]\n\n # Build our mapping\n vectorize_providers_mapping[display_name] = [provider_key, models]\n\n # Sort the resulting dictionary\n return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))\n except Exception as _: # noqa: BLE001\n return {}\n\n @classmethod\n async def create_database_api(\n cls,\n new_database_name: str,\n cloud_provider: str,\n region: str,\n token: str,\n environment: str | None = None,\n keyspace: str | None = None,\n ):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the environment, set to prod if null like\n my_env = environment or \"prod\"\n\n # Raise a value error if name isn't provided\n if not new_database_name:\n msg = \"Database name is required to create a new database.\"\n raise ValueError(msg)\n\n # Call the create database function\n return await admin_client.async_create_database(\n name=new_database_name,\n cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider][\"id\"],\n region=region,\n keyspace=keyspace,\n wait_until_active=False,\n )\n\n @classmethod\n async def create_collection_api(\n cls,\n new_collection_name: str,\n token: str,\n api_endpoint: str,\n environment: str | None = None,\n keyspace: str | None = None,\n dimension: int | None = None,\n embedding_generation_provider: str | None = None,\n embedding_generation_model: str | None = None,\n reranker: str | None = None,\n ):\n # Build vectorize options, if needed\n vectorize_options = None\n if not dimension:\n providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)\n vectorize_options = VectorServiceOptions(\n provider=providers.get(embedding_generation_provider, [None, []])[0],\n model_name=embedding_generation_model,\n )\n\n # Raise a value error if name isn't provided\n if not new_collection_name:\n msg = \"Collection name is required to create a new collection.\"\n raise ValueError(msg)\n\n # Define the base arguments being passed to the create collection function\n base_args = {\n \"collection_name\": new_collection_name,\n \"token\": token,\n \"api_endpoint\": api_endpoint,\n \"keyspace\": keyspace,\n \"environment\": environment,\n \"embedding_dimension\": dimension,\n \"collection_vector_service_options\": vectorize_options,\n }\n\n # Add optional arguments if the reranker is set\n if reranker:\n # Split the reranker field into a provider a model name\n provider, _ = reranker.split(\"/\")\n base_args[\"collection_rerank\"] = CollectionRerankOptions(\n service=RerankServiceOptions(provider=provider, model_name=reranker),\n )\n base_args[\"collection_lexical\"] = CollectionLexicalOptions(analyzer=\"STANDARD\")\n\n _AstraDBCollectionEnvironment(**base_args)\n\n @classmethod\n def get_database_list_static(cls, token: str, environment: str | None = None):\n client = DataAPIClient(environment=environment)\n\n # Get the admin object\n admin_client = client.get_admin(token=token)\n\n # Get the list of databases\n db_list = admin_client.list_databases()\n\n # Generate the api endpoint for each database\n db_info_dict = {}\n for db in db_list:\n try:\n # Get the API endpoint for the database\n api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]\n\n # Get the number of collections\n try:\n # Get the number of collections in the database\n num_collections = len(\n client.get_database(\n api_endpoints[0],\n token=token,\n ).list_collection_names()\n )\n except Exception: # noqa: BLE001\n if db.status != \"PENDING\":\n continue\n num_collections = 0\n\n # Add the database to the dictionary\n db_info_dict[db.name] = {\n \"api_endpoints\": api_endpoints,\n \"keyspaces\": db.keyspaces,\n \"collections\": num_collections,\n \"status\": db.status if db.status != \"ACTIVE\" else None,\n \"org_id\": db.org_id if db.org_id else None,\n }\n except Exception: # noqa: BLE001, S110\n pass\n\n return db_info_dict\n\n def get_database_list(self):\n return self.get_database_list_static(\n token=self.token,\n environment=self.environment,\n )\n\n @classmethod\n def get_api_endpoint_static(\n cls,\n token: str,\n environment: str | None = None,\n api_endpoint: str | None = None,\n database_name: str | None = None,\n ):\n # If the api_endpoint is set, return it\n if api_endpoint:\n return api_endpoint\n\n # Check if the database_name is like a url\n if database_name and database_name.startswith(\"https://\"):\n return database_name\n\n # If the database is not set, nothing we can do.\n if not database_name:\n return None\n\n # Grab the database object\n db = cls.get_database_list_static(token=token, environment=environment).get(database_name)\n if not db:\n return None\n\n # Otherwise, get the URL from the database list\n endpoints = db.get(\"api_endpoints\") or []\n return endpoints[0] if endpoints else None\n\n def get_api_endpoint(self):\n return self.get_api_endpoint_static(\n token=self.token,\n environment=self.environment,\n api_endpoint=self.api_endpoint,\n database_name=self.database_name,\n )\n\n @classmethod\n def get_database_id_static(cls, api_endpoint: str) -> str | None:\n # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters\n uuid_pattern = r\"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\"\n match = re.search(uuid_pattern, api_endpoint)\n\n return match.group(0) if match else None\n\n def get_database_id(self):\n return self.get_database_id_static(api_endpoint=self.get_api_endpoint())\n\n def get_keyspace(self):\n keyspace = self.keyspace\n\n if keyspace:\n return keyspace.strip()\n\n return \"default_keyspace\"\n\n def get_database_object(self, api_endpoint: str | None = None):\n try:\n client = DataAPIClient(environment=self.environment)\n\n return client.get_database(\n api_endpoint or self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n except Exception as e:\n msg = f\"Error fetching database object: {e}\"\n raise ValueError(msg) from e\n\n def collection_data(self, collection_name: str, database: Database | None = None):\n try:\n if not database:\n client = DataAPIClient(environment=self.environment)\n\n database = client.get_database(\n self.get_api_endpoint(),\n token=self.token,\n keyspace=self.get_keyspace(),\n )\n\n collection = database.get_collection(collection_name)\n\n return collection.estimated_document_count()\n except Exception as e: # noqa: BLE001\n self.log(f\"Error checking collection data: {e}\")\n\n return None\n\n def _initialize_database_options(self):\n try:\n return [\n {\n \"name\": name,\n \"status\": info[\"status\"],\n \"collections\": info[\"collections\"],\n \"api_endpoints\": info[\"api_endpoints\"],\n \"keyspaces\": info[\"keyspaces\"],\n \"org_id\": info[\"org_id\"],\n }\n for name, info in self.get_database_list().items()\n ]\n except Exception as e:\n msg = f\"Error fetching database options: {e}\"\n raise ValueError(msg) from e\n\n @classmethod\n def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:\n # Get the provider name from the collection\n provider_name = provider_name or (\n collection.definition.vector.service.provider\n if (\n collection\n and collection.definition\n and collection.definition.vector\n and collection.definition.vector.service\n )\n else None\n )\n\n # If there is no provider, use the vector store icon\n if not provider_name or provider_name.lower() == \"bring your own\":\n return \"vectorstores\"\n\n # Map provider casings\n case_map = {\n \"nvidia\": \"NVIDIA\",\n \"openai\": \"OpenAI\",\n \"amazon bedrock\": \"AmazonBedrockEmbeddings\",\n \"azure openai\": \"AzureOpenAiEmbeddings\",\n \"cohere\": \"Cohere\",\n \"jina ai\": \"JinaAI\",\n \"mistral ai\": \"MistralAI\",\n \"upstage\": \"Upstage\",\n \"voyage ai\": \"VoyageAI\",\n }\n\n # Adjust the casing on some like nvidia\n return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()\n\n def _initialize_collection_options(self, api_endpoint: str | None = None):\n # Nothing to generate if we don't have an API endpoint yet\n api_endpoint = api_endpoint or self.get_api_endpoint()\n if not api_endpoint:\n return []\n\n # Retrieve the database object\n database = self.get_database_object(api_endpoint=api_endpoint)\n\n # Get the list of collections\n collection_list = database.list_collections(keyspace=self.get_keyspace())\n\n # Return the list of collections and metadata associated\n return [\n {\n \"name\": col.name,\n \"records\": self.collection_data(collection_name=col.name, database=database),\n \"provider\": (\n col.definition.vector.service.provider\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n \"icon\": self.get_provider_icon(collection=col),\n \"model\": (\n col.definition.vector.service.model_name\n if col.definition.vector and col.definition.vector.service\n else None\n ),\n }\n for col in collection_list\n ]\n\n def reset_provider_options(self, build_config: dict) -> dict:\n \"\"\"Reset provider options and related configurations in the build_config dictionary.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get vectorize providers\n vectorize_providers_api = self.get_vectorize_providers(\n token=self.token,\n environment=self.environment,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n )\n\n # Create a new dictionary with \"Bring your own\" first\n vectorize_providers: dict[str, list[list[str]]] = {\"Bring your own\": [[], []]}\n\n # Add the remaining items (only Nvidia) from the original dictionary\n vectorize_providers.update(\n {\n k: v\n for k, v in vectorize_providers_api.items()\n if k.lower() in [\"nvidia\"] # TODO: Eventually support more\n }\n )\n\n # Set provider options\n provider_field = \"02_embedding_generation_provider\"\n template[provider_field][\"options\"] = list(vectorize_providers.keys())\n\n # Add metadata for each provider option\n template[provider_field][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field][\"options\"]\n ]\n\n # Get selected embedding provider\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure embedding model field\n model_field = \"03_embedding_generation_model\"\n template[model_field].update(\n {\n \"options\": vectorize_providers.get(embedding_provider, [[], []])[1],\n \"placeholder\": \"Bring your own\" if is_bring_your_own else None,\n \"readonly\": is_bring_your_own,\n \"required\": not is_bring_your_own,\n \"value\": None,\n }\n )\n\n # If this is a bring your own, set dimensions to 0\n return self.reset_dimension_field(build_config)\n\n def reset_dimension_field(self, build_config: dict) -> dict:\n \"\"\"Reset dimension field options based on provided configuration.\"\"\"\n # Extract template path for cleaner access\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n\n # Get selected embedding model\n provider_field = \"02_embedding_generation_provider\"\n embedding_provider = template[provider_field][\"value\"]\n is_bring_your_own = embedding_provider and embedding_provider == \"Bring your own\"\n\n # Configure dimension field\n dimension_field = \"04_dimension\"\n dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out\n template[dimension_field].update(\n {\n \"placeholder\": dimension_value,\n \"value\": dimension_value,\n \"readonly\": not is_bring_your_own,\n \"required\": is_bring_your_own,\n }\n )\n\n return build_config\n\n def reset_collection_list(self, build_config: dict) -> dict:\n \"\"\"Reset collection list options based on provided configuration.\"\"\"\n # Get collection options\n collection_options = self._initialize_collection_options(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n # Update collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update(\n {\n \"options\": [col[\"name\"] for col in collection_options],\n \"options_metadata\": [{k: v for k, v in col.items() if k != \"name\"} for col in collection_options],\n }\n )\n\n # Reset selected collection if not in options\n if collection_config[\"value\"] not in collection_config[\"options\"]:\n collection_config[\"value\"] = \"\"\n\n # Set advanced status based on database selection\n collection_config[\"show\"] = bool(build_config[\"database_name\"][\"value\"])\n\n return build_config\n\n def reset_database_list(self, build_config: dict) -> dict:\n \"\"\"Reset database list options and related configurations.\"\"\"\n # Get database options\n database_options = self._initialize_database_options()\n\n # Update cloud provider options\n env = self.environment\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_cloud_provider\"][\"options\"] = list(self.map_cloud_providers()[env].keys())\n\n # Update database configuration\n database_config = build_config[\"database_name\"]\n database_config.update(\n {\n \"options\": [db[\"name\"] for db in database_options],\n \"options_metadata\": [{k: v for k, v in db.items() if k != \"name\"} for db in database_options],\n }\n )\n\n # Reset selections if value not in options\n if database_config[\"value\"] not in database_config[\"options\"]:\n database_config[\"value\"] = \"\"\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n build_config[\"collection_name\"][\"show\"] = False\n\n # Set advanced status based on token presence\n database_config[\"show\"] = bool(build_config[\"token\"][\"value\"])\n\n return build_config\n\n def reset_build_config(self, build_config: dict) -> dict:\n \"\"\"Reset all build configuration options to default empty state.\"\"\"\n # Reset database configuration\n database_config = build_config[\"database_name\"]\n database_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n build_config[\"api_endpoint\"][\"options\"] = []\n build_config[\"api_endpoint\"][\"value\"] = \"\"\n\n # Reset collection configuration\n collection_config = build_config[\"collection_name\"]\n collection_config.update({\"options\": [], \"options_metadata\": [], \"value\": \"\", \"show\": False})\n\n return build_config\n\n def _handle_hybrid_search_options(self, build_config: dict) -> dict:\n \"\"\"Set hybrid search options in the build configuration.\"\"\"\n # Detect what hybrid options are available\n # Get the admin object\n client = DataAPIClient(environment=self.environment)\n admin_client = client.get_admin()\n db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)\n\n # We will try to get the reranking providers to see if its hybrid emabled\n try:\n providers = db_admin.find_reranking_providers()\n build_config[\"reranker\"][\"options\"] = [\n model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models\n ]\n build_config[\"reranker\"][\"options_metadata\"] = [\n {\"icon\": self.get_provider_icon(provider_name=model.name.split(\"/\")[0])}\n for provider in providers.reranking_providers.values()\n for model in provider.models\n ]\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Set the default search field to hybrid search\n build_config[\"search_method\"][\"show\"] = True\n build_config[\"search_method\"][\"options\"] = [\"Hybrid Search\", \"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Hybrid Search\"\n except Exception as _: # noqa: BLE001\n build_config[\"reranker\"][\"options\"] = []\n build_config[\"reranker\"][\"options_metadata\"] = []\n\n # Set the default search field to vector search\n build_config[\"search_method\"][\"show\"] = False\n build_config[\"search_method\"][\"options\"] = [\"Vector Search\"]\n build_config[\"search_method\"][\"value\"] = \"Vector Search\"\n\n return build_config\n\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Update build configuration based on field name and value.\"\"\"\n # Early return if no token provided\n if not self.token:\n return self.reset_build_config(build_config)\n\n # Database creation callback\n if field_name == \"database_name\" and isinstance(field_value, dict):\n if \"01_new_database_name\" in field_value:\n await self._create_new_database(build_config, field_value)\n return self.reset_collection_list(build_config)\n return self._update_cloud_regions(build_config, field_value)\n\n # Collection creation callback\n if field_name == \"collection_name\" and isinstance(field_value, dict):\n # Case 1: New collection creation\n if \"01_new_collection_name\" in field_value:\n await self._create_new_collection(build_config, field_value)\n return build_config\n\n # Case 2: Update embedding provider options\n if \"02_embedding_generation_provider\" in field_value:\n return self.reset_provider_options(build_config)\n\n # Case 3: Update dimension field\n if \"03_embedding_generation_model\" in field_value:\n return self.reset_dimension_field(build_config)\n\n # Initial execution or token/environment change\n first_run = field_name == \"collection_name\" and not field_value and not build_config[\"database_name\"][\"options\"]\n if first_run or field_name in {\"token\", \"environment\"}:\n return self.reset_database_list(build_config)\n\n # Database selection change\n if field_name == \"database_name\" and not isinstance(field_value, dict):\n return self._handle_database_selection(build_config, field_value)\n\n # Keyspace selection change\n if field_name == \"keyspace\":\n return self.reset_collection_list(build_config)\n\n # Collection selection change\n if field_name == \"collection_name\" and not isinstance(field_value, dict):\n return self._handle_collection_selection(build_config, field_value)\n\n # Search method selection change\n if field_name == \"search_method\":\n is_vector_search = field_value == \"Vector Search\"\n is_autodetect = build_config[\"autodetect_collection\"][\"value\"]\n\n # Configure lexical terms (same for both cases)\n build_config[\"lexical_terms\"][\"show\"] = not is_vector_search\n build_config[\"lexical_terms\"][\"value\"] = \"\" if is_vector_search else build_config[\"lexical_terms\"][\"value\"]\n\n # Disable reranker disabling if hybrid search is selected\n build_config[\"reranker\"][\"show\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_disable\"] = not is_vector_search\n build_config[\"reranker\"][\"toggle_value\"] = True\n build_config[\"reranker\"][\"value\"] = build_config[\"reranker\"][\"options\"][0]\n\n # Toggle search type and score threshold based on search method\n build_config[\"search_type\"][\"show\"] = is_vector_search\n build_config[\"search_score_threshold\"][\"show\"] = is_vector_search\n\n # Make sure the search_type is set to \"Similarity\"\n if not is_vector_search or is_autodetect:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n return build_config\n\n async def _create_new_database(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new database and update build config options.\"\"\"\n try:\n await self.create_database_api(\n new_database_name=field_value[\"01_new_database_name\"],\n token=self.token,\n keyspace=self.get_keyspace(),\n environment=self.environment,\n cloud_provider=field_value[\"02_cloud_provider\"],\n region=field_value[\"03_region\"],\n )\n except Exception as e:\n msg = f\"Error creating database: {e}\"\n raise ValueError(msg) from e\n\n build_config[\"database_name\"][\"options\"].append(field_value[\"01_new_database_name\"])\n build_config[\"database_name\"][\"options_metadata\"].append(\n {\n \"status\": \"PENDING\",\n \"collections\": 0,\n \"api_endpoints\": [],\n \"keyspaces\": [self.get_keyspace()],\n \"org_id\": None,\n }\n )\n\n def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:\n \"\"\"Update cloud provider regions in build config.\"\"\"\n env = self.environment\n cloud_provider = field_value[\"02_cloud_provider\"]\n\n # Update the region options based on the selected cloud provider\n template = build_config[\"database_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"03_region\"][\"options\"] = self.map_cloud_providers()[env][cloud_provider][\"regions\"]\n\n # Reset the the 03_region value if it's not in the new options\n if template[\"03_region\"][\"value\"] not in template[\"03_region\"][\"options\"]:\n template[\"03_region\"][\"value\"] = None\n\n return build_config\n\n async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:\n \"\"\"Create a new collection and update build config options.\"\"\"\n embedding_provider = field_value.get(\"02_embedding_generation_provider\")\n try:\n await self.create_collection_api(\n new_collection_name=field_value[\"01_new_collection_name\"],\n token=self.token,\n api_endpoint=build_config[\"api_endpoint\"][\"value\"],\n environment=self.environment,\n keyspace=self.get_keyspace(),\n dimension=field_value.get(\"04_dimension\") if embedding_provider == \"Bring your own\" else None,\n embedding_generation_provider=embedding_provider,\n embedding_generation_model=field_value.get(\"03_embedding_generation_model\"),\n reranker=self.reranker,\n )\n except Exception as e:\n msg = f\"Error creating collection: {e}\"\n raise ValueError(msg) from e\n\n provider = embedding_provider.lower() if embedding_provider and embedding_provider != \"Bring your own\" else None\n build_config[\"collection_name\"].update(\n {\n \"value\": field_value[\"01_new_collection_name\"],\n \"options\": build_config[\"collection_name\"][\"options\"] + [field_value[\"01_new_collection_name\"]],\n }\n )\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": provider,\n \"icon\": self.get_provider_icon(provider_name=provider),\n \"model\": field_value.get(\"03_embedding_generation_model\"),\n }\n )\n\n # Make sure we always show the reranker options if the collection is hybrid enabled\n # And right now they always are\n build_config[\"lexical_terms\"][\"show\"] = True\n\n def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle database selection and update related configurations.\"\"\"\n build_config = self.reset_database_list(build_config)\n\n # Reset collection list if database selection changes\n if field_value not in build_config[\"database_name\"][\"options\"]:\n build_config[\"database_name\"][\"value\"] = \"\"\n return build_config\n\n # Get the api endpoint for the selected database\n index = build_config[\"database_name\"][\"options\"].index(field_value)\n build_config[\"api_endpoint\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ]\n build_config[\"api_endpoint\"][\"value\"] = build_config[\"database_name\"][\"options_metadata\"][index][\n \"api_endpoints\"\n ][0]\n\n # Get the org_id for the selected database\n org_id = build_config[\"database_name\"][\"options_metadata\"][index][\"org_id\"]\n if not org_id:\n return build_config\n\n # Update the list of keyspaces based on the db info\n build_config[\"keyspace\"][\"options\"] = build_config[\"database_name\"][\"options_metadata\"][index][\"keyspaces\"]\n build_config[\"keyspace\"][\"value\"] = (\n build_config[\"keyspace\"][\"options\"] and build_config[\"keyspace\"][\"options\"][0]\n if build_config[\"keyspace\"][\"value\"] not in build_config[\"keyspace\"][\"options\"]\n else build_config[\"keyspace\"][\"value\"]\n )\n\n # Get the database id for the selected database\n db_id = self.get_database_id_static(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n keyspace = self.get_keyspace()\n\n # Update the helper text for the embedding provider field\n template = build_config[\"collection_name\"][\"dialog_inputs\"][\"fields\"][\"data\"][\"node\"][\"template\"]\n template[\"02_embedding_generation_provider\"][\"helper_text\"] = (\n \"To create collections with more embedding provider options, go to \"\n f''\n \"your database in Astra DB.\"\n )\n\n # Reset provider options\n build_config = self.reset_provider_options(build_config)\n\n # Handle hybrid search options\n build_config = self._handle_hybrid_search_options(build_config)\n\n return self.reset_collection_list(build_config)\n\n def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:\n \"\"\"Handle collection selection and update embedding options.\"\"\"\n build_config[\"autodetect_collection\"][\"value\"] = True\n build_config = self.reset_collection_list(build_config)\n\n # Reset embedding model if collection selection changes\n if field_value and field_value not in build_config[\"collection_name\"][\"options\"]:\n build_config[\"collection_name\"][\"options\"].append(field_value)\n build_config[\"collection_name\"][\"options_metadata\"].append(\n {\n \"records\": 0,\n \"provider\": None,\n \"icon\": \"vectorstores\",\n \"model\": None,\n }\n )\n build_config[\"autodetect_collection\"][\"value\"] = False\n\n if not field_value:\n return build_config\n\n # Get the selected collection index\n index = build_config[\"collection_name\"][\"options\"].index(field_value)\n\n # Set the provider of the selected collection\n provider = build_config[\"collection_name\"][\"options_metadata\"][index][\"provider\"]\n build_config[\"embedding_model\"][\"show\"] = not bool(provider)\n build_config[\"embedding_model\"][\"required\"] = not bool(provider)\n\n # Grab the collection object\n database = self.get_database_object(api_endpoint=build_config[\"api_endpoint\"][\"value\"])\n collection = database.get_collection(\n name=field_value,\n keyspace=build_config[\"keyspace\"][\"value\"],\n )\n\n # Check if hybrid and lexical are enabled\n col_options = collection.options()\n hyb_enabled = col_options.rerank and col_options.rerank.enabled\n lex_enabled = col_options.lexical and col_options.lexical.enabled\n user_hyb_enabled = build_config[\"search_method\"][\"value\"] == \"Hybrid Search\"\n\n # Reranker visible when both the collection supports it and the user selected Hybrid\n hybrid_active = bool(hyb_enabled and user_hyb_enabled)\n build_config[\"reranker\"][\"show\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_value\"] = hybrid_active\n build_config[\"reranker\"][\"toggle_disable\"] = False # allow user to toggle if visible\n\n # If hybrid is active, lock search_type to \"Similarity\"\n if hybrid_active:\n build_config[\"search_type\"][\"value\"] = \"Similarity\"\n\n # Show the lexical terms option only if the collection enables lexical search\n build_config[\"lexical_terms\"][\"show\"] = bool(lex_enabled)\n\n return build_config\n\n @check_cached_vector_store\n def build_vector_store(self):\n try:\n from langchain_astradb import AstraDBVectorStore\n except ImportError as e:\n msg = (\n \"Could not import langchain Astra DB integration package. \"\n \"Please install it with `pip install langchain-astradb`.\"\n )\n raise ImportError(msg) from e\n\n # Get the embedding model and additional params\n embedding_params = {\"embedding\": self.embedding_model} if self.embedding_model else {}\n\n # Get the additional parameters\n additional_params = self.astradb_vectorstore_kwargs or {}\n\n # Get Langflow version and platform information\n __version__ = get_version_info()[\"version\"]\n langflow_prefix = \"\"\n # if os.getenv(\"AWS_EXECUTION_ENV\") == \"AWS_ECS_FARGATE\": # TODO: More precise way of detecting\n # langflow_prefix = \"ds-\"\n\n # Get the database object\n database = self.get_database_object()\n autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection\n\n # Bundle up the auto-detect parameters\n autodetect_params = {\n \"autodetect_collection\": autodetect,\n \"content_field\": (\n self.content_field\n if self.content_field and embedding_params\n else (\n \"page_content\"\n if embedding_params\n and self.collection_data(collection_name=self.collection_name, database=database) == 0\n else None\n )\n ),\n \"ignore_invalid_documents\": self.ignore_invalid_documents,\n }\n\n # Choose HybridSearchMode based on the selected param\n hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == \"Hybrid Search\" else HybridSearchMode.OFF\n\n # Attempt to build the Vector Store object\n try:\n vector_store = AstraDBVectorStore(\n # Astra DB Authentication Parameters\n token=self.token,\n api_endpoint=database.api_endpoint,\n namespace=database.keyspace,\n collection_name=self.collection_name,\n environment=self.environment,\n # Hybrid Search Parameters\n hybrid_search=hybrid_search_mode,\n # Astra DB Usage Tracking Parameters\n ext_callers=[(f\"{langflow_prefix}langflow\", __version__)],\n # Astra DB Vector Store Parameters\n **autodetect_params,\n **embedding_params,\n **additional_params,\n )\n except Exception as e:\n msg = f\"Error initializing AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n # Add documents to the vector store\n self._add_documents_to_vector_store(vector_store)\n\n return vector_store\n\n def _add_documents_to_vector_store(self, vector_store) -> None:\n self.ingest_data = self._prepare_ingest_data()\n\n documents = []\n for _input in self.ingest_data or []:\n if isinstance(_input, Data):\n documents.append(_input.to_lc_document())\n else:\n msg = \"Vector Store Inputs must be Data objects.\"\n raise TypeError(msg)\n\n documents = [\n Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents\n ]\n\n if documents and self.deletion_field:\n self.log(f\"Deleting documents where {self.deletion_field}\")\n try:\n database = self.get_database_object()\n collection = database.get_collection(self.collection_name, keyspace=database.keyspace)\n delete_values = list({doc.metadata[self.deletion_field] for doc in documents})\n self.log(f\"Deleting documents where {self.deletion_field} matches {delete_values}.\")\n collection.delete_many({f\"metadata.{self.deletion_field}\": {\"$in\": delete_values}})\n except Exception as e:\n msg = f\"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}\"\n raise ValueError(msg) from e\n\n if documents:\n self.log(f\"Adding {len(documents)} documents to the Vector Store.\")\n try:\n vector_store.add_documents(documents)\n except Exception as e:\n msg = f\"Error adding documents to AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n else:\n self.log(\"No documents to add to the Vector Store.\")\n\n def _map_search_type(self) -> str:\n search_type_mapping = {\n \"Similarity with score threshold\": \"similarity_score_threshold\",\n \"MMR (Max Marginal Relevance)\": \"mmr\",\n }\n\n return search_type_mapping.get(self.search_type, \"similarity\")\n\n def _build_search_args(self):\n # Clean up the search query\n query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None\n lexical_terms = self.lexical_terms or None\n\n # Check if we have a search query, and if so set the args\n if query:\n args = {\n \"query\": query,\n \"search_type\": self._map_search_type(),\n \"k\": self.number_of_results,\n \"score_threshold\": self.search_score_threshold,\n \"lexical_query\": lexical_terms,\n }\n elif self.advanced_search_filter:\n args = {\n \"n\": self.number_of_results,\n }\n else:\n return {}\n\n filter_arg = self.advanced_search_filter or {}\n if filter_arg:\n args[\"filter\"] = filter_arg\n\n return args\n\n def search_documents(self, vector_store=None) -> list[Data]:\n vector_store = vector_store or self.build_vector_store()\n\n self.log(f\"Search input: {self.search_query}\")\n self.log(f\"Search type: {self.search_type}\")\n self.log(f\"Number of results: {self.number_of_results}\")\n self.log(f\"store.hybrid_search: {vector_store.hybrid_search}\")\n self.log(f\"Lexical terms: {self.lexical_terms}\")\n self.log(f\"Reranker: {self.reranker}\")\n\n try:\n search_args = self._build_search_args()\n except Exception as e:\n msg = f\"Error in AstraDBVectorStore._build_search_args: {e}\"\n raise ValueError(msg) from e\n\n if not search_args:\n self.log(\"No search input or filters provided. Skipping search.\")\n return []\n\n docs = []\n search_method = \"search\" if \"query\" in search_args else \"metadata_search\"\n\n try:\n self.log(f\"Calling vector_store.{search_method} with args: {search_args}\")\n docs = getattr(vector_store, search_method)(**search_args)\n except Exception as e:\n msg = f\"Error performing {search_method} in AstraDBVectorStore: {e}\"\n raise ValueError(msg) from e\n\n self.log(f\"Retrieved documents: {len(docs)}\")\n\n data = docs_to_data(docs)\n self.log(f\"Converted documents to data: {len(data)}\")\n self.status = data\n\n return data\n\n def get_retriever_kwargs(self):\n search_args = self._build_search_args()\n\n return {\n \"search_type\": self._map_search_type(),\n \"search_kwargs\": search_args,\n }\n" }, "collection_name": { "_input_type": "DropdownInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index 969918dc3871..398a8d1b0151 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -26,9 +26,9 @@ "id": "reactflow__edge-ChatInput-zueUA{œdataTypeœ:œChatInputœ,œidœ:œChatInput-zueUAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-oDuVT{œfieldNameœ:œquestionœ,œidœ:œPrompt-oDuVTœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", "selected": false, "source": "ChatInput-zueUA", - "sourceHandle": "{œdataTypeœ:œChatInputœ,œidœ:œChatInput-zueUAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-zueUAœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", "target": "Prompt-oDuVT", - "targetHandle": "{œfieldNameœ:œquestionœ,œidœ:œPrompt-oDuVTœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}" + "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-oDuVTœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -55,9 +55,9 @@ "id": "reactflow__edge-parser-CbWBG{œdataTypeœ:œparserœ,œidœ:œparser-CbWBGœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-oDuVT{œfieldNameœ:œcontextœ,œidœ:œPrompt-oDuVTœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", "selected": false, "source": "parser-CbWBG", - "sourceHandle": "{œdataTypeœ:œparserœ,œidœ:œparser-CbWBGœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œparserœ, œidœ: œparser-CbWBGœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", "target": "Prompt-oDuVT", - "targetHandle": "{œfieldNameœ:œcontextœ,œidœ:œPrompt-oDuVTœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}" + "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-oDuVTœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -85,9 +85,9 @@ "id": "reactflow__edge-File-PvlCh{œdataTypeœ:œFileœ,œidœ:œFile-PvlChœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-SplitText-l3kJG{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-l3kJGœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", "selected": false, "source": "File-PvlCh", - "sourceHandle": "{œdataTypeœ:œFileœ,œidœ:œFile-PvlChœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-PvlChœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", "target": "SplitText-l3kJG", - "targetHandle": "{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-l3kJGœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}" + "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-l3kJGœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}" }, { "animated": false, @@ -113,9 +113,9 @@ "id": "reactflow__edge-Prompt-oDuVT{œdataTypeœ:œPromptœ,œidœ:œPrompt-oDuVTœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-LanguageModelComponent-9vLb9{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-9vLb9œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, "source": "Prompt-oDuVT", - "sourceHandle": "{œdataTypeœ:œPromptœ,œidœ:œPrompt-oDuVTœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-oDuVTœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", "target": "LanguageModelComponent-9vLb9", - "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-9vLb9œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œLanguageModelComponent-9vLb9œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -143,9 +143,9 @@ "id": "reactflow__edge-LanguageModelComponent-9vLb9{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-9vLb9œ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-NH6u1{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-NH6u1œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}", "selected": false, "source": "LanguageModelComponent-9vLb9", - "sourceHandle": "{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-9vLb9œ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-9vLb9œ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", "target": "ChatOutput-NH6u1", - "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-NH6u1œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}" + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-NH6u1œ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -172,9 +172,9 @@ "id": "xy-edge__SplitText-l3kJG{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-l3kJGœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-AstraDB-s5fQW{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-s5fQWœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", "selected": false, "source": "SplitText-l3kJG", - "sourceHandle": "{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-l3kJGœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}", + "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-l3kJGœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", "target": "AstraDB-s5fQW", - "targetHandle": "{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-s5fQWœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}" + "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-s5fQWœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" }, { "className": "", @@ -198,9 +198,9 @@ }, "id": "xy-edge__ChatInput-zueUA{œdataTypeœ:œChatInputœ,œidœ:œChatInput-zueUAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-WAq6g{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-WAq6gœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", "source": "ChatInput-zueUA", - "sourceHandle": "{œdataTypeœ:œChatInputœ,œidœ:œChatInput-zueUAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-zueUAœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", "target": "AstraDB-WAq6g", - "targetHandle": "{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-WAq6gœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}" + "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-WAq6gœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}" }, { "className": "", @@ -225,9 +225,9 @@ }, "id": "xy-edge__AstraDB-WAq6g{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-WAq6gœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}-parser-CbWBG{œfieldNameœ:œinput_dataœ,œidœ:œparser-CbWBGœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", "source": "AstraDB-WAq6g", - "sourceHandle": "{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-WAq6gœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}", + "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-WAq6gœ, œnameœ: œsearch_resultsœ, œoutput_typesœ: [œDataœ]}", "target": "parser-CbWBG", - "targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œparser-CbWBGœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}" + "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œparser-CbWBGœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" } ], "nodes": [ @@ -3326,8 +3326,6 @@ "group_outputs": false, "method": "search_documents", "name": "search_results", - "options": null, - "required_inputs": null, "selected": "Data", "tool_mode": true, "types": [ @@ -3342,8 +3340,6 @@ "group_outputs": false, "method": "as_dataframe", "name": "dataframe", - "options": null, - "required_inputs": null, "selected": "DataFrame", "tool_mode": true, "types": [ @@ -3359,8 +3355,6 @@ "hidden": true, "method": "as_vector_store", "name": "vectorstoreconnection", - "options": null, - "required_inputs": null, "selected": "VectorStore", "tool_mode": true, "types": [ @@ -4138,8 +4132,6 @@ "group_outputs": false, "method": "search_documents", "name": "search_results", - "options": null, - "required_inputs": null, "selected": "Data", "tool_mode": true, "types": [ @@ -4154,8 +4146,6 @@ "group_outputs": false, "method": "as_dataframe", "name": "dataframe", - "options": null, - "required_inputs": null, "selected": "DataFrame", "tool_mode": true, "types": [ @@ -4171,8 +4161,6 @@ "hidden": true, "method": "as_vector_store", "name": "vectorstoreconnection", - "options": null, - "required_inputs": null, "selected": "VectorStore", "tool_mode": true, "types": [ From 5fa626bcc533db1bda7efbbf0c41b9e5ffa9cbb1 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Tue, 26 Aug 2025 09:47:10 -0700 Subject: [PATCH 3/4] Update Vector Store RAG.json --- .../starter_projects/Vector Store RAG.json | 218 +++++++++--------- 1 file changed, 115 insertions(+), 103 deletions(-) diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index 398a8d1b0151..8d9d4f6a8dde 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -7,7 +7,7 @@ "data": { "sourceHandle": { "dataType": "ChatInput", - "id": "ChatInput-zueUA", + "id": "ChatInput-w1dxV", "name": "message", "output_types": [ "Message" @@ -15,7 +15,7 @@ }, "targetHandle": { "fieldName": "question", - "id": "Prompt-oDuVT", + "id": "Prompt-jBCaI", "inputTypes": [ "Message", "Text" @@ -23,12 +23,12 @@ "type": "str" } }, - "id": "reactflow__edge-ChatInput-zueUA{œdataTypeœ:œChatInputœ,œidœ:œChatInput-zueUAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-oDuVT{œfieldNameœ:œquestionœ,œidœ:œPrompt-oDuVTœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-ChatInput-w1dxV{œdataTypeœ:œChatInputœ,œidœ:œChatInput-w1dxVœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-jBCaI{œfieldNameœ:œquestionœ,œidœ:œPrompt-jBCaIœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", "selected": false, - "source": "ChatInput-zueUA", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-zueUAœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-oDuVT", - "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-oDuVTœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" + "source": "ChatInput-w1dxV", + "sourceHandle": "{œdataTypeœ:œChatInputœ,œidœ:œChatInput-w1dxVœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "target": "Prompt-jBCaI", + "targetHandle": "{œfieldNameœ:œquestionœ,œidœ:œPrompt-jBCaIœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}" }, { "animated": false, @@ -36,7 +36,7 @@ "data": { "sourceHandle": { "dataType": "parser", - "id": "parser-CbWBG", + "id": "parser-LlSmH", "name": "parsed_text", "output_types": [ "Message" @@ -44,7 +44,7 @@ }, "targetHandle": { "fieldName": "context", - "id": "Prompt-oDuVT", + "id": "Prompt-jBCaI", "inputTypes": [ "Message", "Text" @@ -52,12 +52,12 @@ "type": "str" } }, - "id": "reactflow__edge-parser-CbWBG{œdataTypeœ:œparserœ,œidœ:œparser-CbWBGœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-oDuVT{œfieldNameœ:œcontextœ,œidœ:œPrompt-oDuVTœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-parser-LlSmH{œdataTypeœ:œparserœ,œidœ:œparser-LlSmHœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-jBCaI{œfieldNameœ:œcontextœ,œidœ:œPrompt-jBCaIœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", "selected": false, - "source": "parser-CbWBG", - "sourceHandle": "{œdataTypeœ: œparserœ, œidœ: œparser-CbWBGœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-oDuVT", - "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-oDuVTœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" + "source": "parser-LlSmH", + "sourceHandle": "{œdataTypeœ:œparserœ,œidœ:œparser-LlSmHœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}", + "target": "Prompt-jBCaI", + "targetHandle": "{œfieldNameœ:œcontextœ,œidœ:œPrompt-jBCaIœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}" }, { "animated": false, @@ -65,7 +65,7 @@ "data": { "sourceHandle": { "dataType": "File", - "id": "File-PvlCh", + "id": "File-TyJTr", "name": "message", "output_types": [ "Message" @@ -73,7 +73,7 @@ }, "targetHandle": { "fieldName": "data_inputs", - "id": "SplitText-l3kJG", + "id": "SplitText-YWbhC", "inputTypes": [ "Data", "DataFrame", @@ -82,12 +82,12 @@ "type": "other" } }, - "id": "reactflow__edge-File-PvlCh{œdataTypeœ:œFileœ,œidœ:œFile-PvlChœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-SplitText-l3kJG{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-l3kJGœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-File-TyJTr{œdataTypeœ:œFileœ,œidœ:œFile-TyJTrœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-SplitText-YWbhC{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-YWbhCœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", "selected": false, - "source": "File-PvlCh", - "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-PvlChœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "SplitText-l3kJG", - "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-l3kJGœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}" + "source": "File-TyJTr", + "sourceHandle": "{œdataTypeœ:œFileœ,œidœ:œFile-TyJTrœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "target": "SplitText-YWbhC", + "targetHandle": "{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-YWbhCœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}" }, { "animated": false, @@ -95,7 +95,7 @@ "data": { "sourceHandle": { "dataType": "Prompt", - "id": "Prompt-oDuVT", + "id": "Prompt-jBCaI", "name": "prompt", "output_types": [ "Message" @@ -103,19 +103,19 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "LanguageModelComponent-9vLb9", + "id": "LanguageModelComponent-XEWmQ", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-Prompt-oDuVT{œdataTypeœ:œPromptœ,œidœ:œPrompt-oDuVTœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-LanguageModelComponent-9vLb9{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-9vLb9œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-Prompt-jBCaI{œdataTypeœ:œPromptœ,œidœ:œPrompt-jBCaIœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-LanguageModelComponent-XEWmQ{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-XEWmQœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "Prompt-oDuVT", - "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-oDuVTœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", - "target": "LanguageModelComponent-9vLb9", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œLanguageModelComponent-9vLb9œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "Prompt-jBCaI", + "sourceHandle": "{œdataTypeœ:œPromptœ,œidœ:œPrompt-jBCaIœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}", + "target": "LanguageModelComponent-XEWmQ", + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-XEWmQœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" }, { "animated": false, @@ -123,7 +123,7 @@ "data": { "sourceHandle": { "dataType": "LanguageModelComponent", - "id": "LanguageModelComponent-9vLb9", + "id": "LanguageModelComponent-XEWmQ", "name": "text_output", "output_types": [ "Message" @@ -131,7 +131,7 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "ChatOutput-NH6u1", + "id": "ChatOutput-jIJTT", "inputTypes": [ "Data", "DataFrame", @@ -140,12 +140,12 @@ "type": "str" } }, - "id": "reactflow__edge-LanguageModelComponent-9vLb9{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-9vLb9œ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-NH6u1{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-NH6u1œ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-LanguageModelComponent-XEWmQ{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-XEWmQœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-jIJTT{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-jIJTTœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "LanguageModelComponent-9vLb9", - "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-9vLb9œ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", - "target": "ChatOutput-NH6u1", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-NH6u1œ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œstrœ}" + "source": "LanguageModelComponent-XEWmQ", + "sourceHandle": "{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-XEWmQœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}", + "target": "ChatOutput-jIJTT", + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-jIJTTœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}" }, { "animated": false, @@ -153,7 +153,7 @@ "data": { "sourceHandle": { "dataType": "SplitText", - "id": "SplitText-l3kJG", + "id": "SplitText-YWbhC", "name": "dataframe", "output_types": [ "DataFrame" @@ -161,7 +161,7 @@ }, "targetHandle": { "fieldName": "ingest_data", - "id": "AstraDB-s5fQW", + "id": "AstraDB-92S9t", "inputTypes": [ "Data", "DataFrame" @@ -169,19 +169,20 @@ "type": "other" } }, - "id": "xy-edge__SplitText-l3kJG{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-l3kJGœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-AstraDB-s5fQW{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-s5fQWœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-SplitText-YWbhC{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-YWbhCœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-AstraDB-92S9t{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-92S9tœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", "selected": false, - "source": "SplitText-l3kJG", - "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-l3kJGœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", - "target": "AstraDB-s5fQW", - "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-s5fQWœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" + "source": "SplitText-YWbhC", + "sourceHandle": "{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-YWbhCœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}", + "target": "AstraDB-92S9t", + "targetHandle": "{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-92S9tœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}" }, { + "animated": false, "className": "", "data": { "sourceHandle": { "dataType": "ChatInput", - "id": "ChatInput-zueUA", + "id": "ChatInput-w1dxV", "name": "message", "output_types": [ "Message" @@ -189,33 +190,34 @@ }, "targetHandle": { "fieldName": "search_query", - "id": "AstraDB-WAq6g", + "id": "AstraDB-Lm2es", "inputTypes": [ "Message" ], "type": "query" } }, - "id": "xy-edge__ChatInput-zueUA{œdataTypeœ:œChatInputœ,œidœ:œChatInput-zueUAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-WAq6g{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-WAq6gœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", - "source": "ChatInput-zueUA", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-zueUAœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "AstraDB-WAq6g", - "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-WAq6gœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}" + "id": "reactflow__edge-ChatInput-w1dxV{œdataTypeœ:œChatInputœ,œidœ:œChatInput-w1dxVœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-Lm2es{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-Lm2esœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", + "selected": false, + "source": "ChatInput-w1dxV", + "sourceHandle": "{œdataTypeœ:œChatInputœ,œidœ:œChatInput-w1dxVœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "target": "AstraDB-Lm2es", + "targetHandle": "{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-Lm2esœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}" }, { "className": "", "data": { "sourceHandle": { "dataType": "AstraDB", - "id": "AstraDB-WAq6g", - "name": "search_results", + "id": "AstraDB-Lm2es", + "name": "dataframe", "output_types": [ - "Data" + "DataFrame" ] }, "targetHandle": { "fieldName": "input_data", - "id": "parser-CbWBG", + "id": "parser-LlSmH", "inputTypes": [ "DataFrame", "Data" @@ -223,11 +225,11 @@ "type": "other" } }, - "id": "xy-edge__AstraDB-WAq6g{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-WAq6gœ,œnameœ:œsearch_resultsœ,œoutput_typesœ:[œDataœ]}-parser-CbWBG{œfieldNameœ:œinput_dataœ,œidœ:œparser-CbWBGœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", - "source": "AstraDB-WAq6g", - "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-WAq6gœ, œnameœ: œsearch_resultsœ, œoutput_typesœ: [œDataœ]}", - "target": "parser-CbWBG", - "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œparser-CbWBGœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" + "id": "xy-edge__AstraDB-Lm2es{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-Lm2esœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-parser-LlSmH{œfieldNameœ:œinput_dataœ,œidœ:œparser-LlSmHœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", + "source": "AstraDB-Lm2es", + "sourceHandle": "{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-Lm2esœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}", + "target": "parser-LlSmH", + "targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œparser-LlSmHœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}" } ], "nodes": [ @@ -235,7 +237,7 @@ "data": { "description": "Get chat inputs from the Playground.", "display_name": "Chat Input", - "id": "ChatInput-zueUA", + "id": "ChatInput-w1dxV", "node": { "base_classes": [ "Message" @@ -514,7 +516,7 @@ }, "dragging": false, "height": 234, - "id": "ChatInput-zueUA", + "id": "ChatInput-w1dxV", "measured": { "height": 234, "width": 320 @@ -535,7 +537,7 @@ "data": { "description": "Create a prompt template with dynamic variables.", "display_name": "Prompt", - "id": "Prompt-oDuVT", + "id": "Prompt-jBCaI", "node": { "base_classes": [ "Message" @@ -699,7 +701,7 @@ }, "dragging": false, "height": 433, - "id": "Prompt-oDuVT", + "id": "Prompt-jBCaI", "measured": { "height": 433, "width": 320 @@ -720,7 +722,7 @@ "data": { "description": "Split text into chunks based on specified criteria.", "display_name": "Split Text", - "id": "SplitText-l3kJG", + "id": "SplitText-YWbhC", "node": { "base_classes": [ "Data" @@ -922,7 +924,7 @@ }, "dragging": false, "height": 475, - "id": "SplitText-l3kJG", + "id": "SplitText-YWbhC", "measured": { "height": 475, "width": 320 @@ -941,7 +943,7 @@ }, { "data": { - "id": "note-XRjSv", + "id": "note-vztSC", "node": { "description": "## 🐕 2. Retriever Flow\n\nThis flow answers your questions with contextual data retrieved from your vector database.\n\nOpen the **Playground** and ask, \n\n```\nWhat is this document about?\n```\n", "display_name": "", @@ -954,7 +956,7 @@ }, "dragging": false, "height": 324, - "id": "note-XRjSv", + "id": "note-vztSC", "measured": { "height": 324, "width": 324 @@ -978,7 +980,7 @@ }, { "data": { - "id": "note-Z0mzM", + "id": "note-4b71P", "node": { "description": "Retrieval Augmented Generation (RAG) is a way of providing additional context to a Large Language Model (LLM) by preloading a vector database with embeddings for relevant content. When a user chats with the LLM, a _similarity search_ retrieves relevant content by comparing an embedding for the user's query against the embeddings in the vector database.\nFor example, a RAG chatbot could be pre-loaded with product data, and then it can help customers find specific products based on their queries.\nThis template has two sub-flows. One flow loads data into your vector store, and the other is the user-driven chat flow that compares a new query against the existing content in your vector database.\n\n## Quickstart\n1. Add your OpenAI API key to the **Language Model** component and the two **Embeddings** components.\n2. Add an Astra application token to the **Astra DB** vector store components, or replace these components with other vector store components available in the **Components** menu.\n**💡 Store your credentials as Langflow global variables 🌐 to simplify token management and reuse in your flows.**\n\n## Run the flows\n1. Load your data into a vector database with the 📚 **Load Data** flow. Select a file to upload in the **File** component, and then click **Play** ▶️ on the **Astra DB** component to run the **Load Data** flow.\n2. Open the **Playground** to start a chat with the 🐕 **Retriever** flow.\n\nOnly the run the **Load Data** flow when you need to populate your vector database with baseline content, such as product data.\nThe **Retriever** flow is the user-facing chat flow. This flow generates an embedding from chat input, runs a similarity search against the vector database to retrieve relevant content, and then passes the original query and the retrieved content to the LLM, which produces the chat response sent to the user.\n\n## Next steps\nExperiment by changing the prompt and the loaded data to see how the LLM's responses change.", "display_name": "Read Me", @@ -991,7 +993,7 @@ }, "dragging": false, "height": 556, - "id": "note-Z0mzM", + "id": "note-4b71P", "measured": { "height": 556, "width": 389 @@ -1017,7 +1019,7 @@ "data": { "description": "Display a chat message in the Playground.", "display_name": "Chat Output", - "id": "ChatOutput-NH6u1", + "id": "ChatOutput-jIJTT", "node": { "base_classes": [ "Message" @@ -1320,7 +1322,7 @@ }, "dragging": false, "height": 234, - "id": "ChatOutput-NH6u1", + "id": "ChatOutput-jIJTT", "measured": { "height": 234, "width": 320 @@ -1339,7 +1341,7 @@ }, { "data": { - "id": "OpenAIEmbeddings-3lfxG", + "id": "OpenAIEmbeddings-d0XPm", "node": { "base_classes": [ "Embeddings" @@ -1832,7 +1834,7 @@ }, "dragging": false, "height": 320, - "id": "OpenAIEmbeddings-3lfxG", + "id": "OpenAIEmbeddings-d0XPm", "measured": { "height": 320, "width": 320 @@ -1851,7 +1853,7 @@ }, { "data": { - "id": "note-nD1ad", + "id": "note-AeLDI", "node": { "description": "## 📚 1. Load Data Flow\n\nRun this first! Load data from a local file and embed it into the vector database.\n\nSelect a Database and a Collection, or create new ones. \n\nClick **Run component** on the **Astra DB** component to load your data.\n\n\n### Next steps:\n Experiment by changing the prompt and the contextual data to see how the retrieval flow's responses change.", "display_name": "", @@ -1864,7 +1866,7 @@ }, "dragging": false, "height": 460, - "id": "note-nD1ad", + "id": "note-AeLDI", "measured": { "height": 460, "width": 340 @@ -1888,7 +1890,7 @@ }, { "data": { - "id": "OpenAIEmbeddings-3XP2J", + "id": "OpenAIEmbeddings-8Zseb", "node": { "base_classes": [ "Embeddings" @@ -2381,7 +2383,7 @@ }, "dragging": false, "height": 320, - "id": "OpenAIEmbeddings-3XP2J", + "id": "OpenAIEmbeddings-8Zseb", "measured": { "height": 320, "width": 320 @@ -2400,7 +2402,7 @@ }, { "data": { - "id": "note-Yb8sb", + "id": "note-Dkwdo", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2413,7 +2415,7 @@ }, "dragging": false, "height": 324, - "id": "note-Yb8sb", + "id": "note-Dkwdo", "measured": { "height": 324, "width": 324 @@ -2432,7 +2434,7 @@ }, { "data": { - "id": "note-jdw5F", + "id": "note-YLhWR", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2445,7 +2447,7 @@ }, "dragging": false, "height": 324, - "id": "note-jdw5F", + "id": "note-YLhWR", "measured": { "height": 324, "width": 324 @@ -2464,7 +2466,7 @@ }, { "data": { - "id": "note-Fps8P", + "id": "note-Il7AR", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2477,7 +2479,7 @@ }, "dragging": false, "height": 324, - "id": "note-Fps8P", + "id": "note-Il7AR", "measured": { "height": 324, "width": 324 @@ -2496,7 +2498,7 @@ }, { "data": { - "id": "parser-CbWBG", + "id": "parser-LlSmH", "node": { "base_classes": [ "Message" @@ -2658,7 +2660,7 @@ "type": "parser" }, "dragging": false, - "id": "parser-CbWBG", + "id": "parser-LlSmH", "measured": { "height": 361, "width": 320 @@ -2672,7 +2674,7 @@ }, { "data": { - "id": "File-PvlCh", + "id": "File-TyJTr", "node": { "base_classes": [ "Message" @@ -2697,7 +2699,7 @@ ], "frozen": false, "icon": "file-text", - "last_updated": "2025-08-26T16:30:56.653Z", + "last_updated": "2025-08-26T16:46:21.648Z", "legacy": false, "metadata": {}, "minimized": false, @@ -2941,7 +2943,7 @@ "type": "File" }, "dragging": false, - "id": "File-PvlCh", + "id": "File-TyJTr", "measured": { "height": 230, "width": 320 @@ -2955,7 +2957,7 @@ }, { "data": { - "id": "LanguageModelComponent-9vLb9", + "id": "LanguageModelComponent-XEWmQ", "node": { "base_classes": [ "LanguageModel", @@ -2979,7 +2981,7 @@ ], "frozen": false, "icon": "brain-circuit", - "last_updated": "2025-08-26T16:30:56.494Z", + "last_updated": "2025-08-26T16:46:21.523Z", "legacy": false, "metadata": { "keywords": [ @@ -3235,7 +3237,7 @@ "type": "LanguageModelComponent" }, "dragging": false, - "id": "LanguageModelComponent-9vLb9", + "id": "LanguageModelComponent-XEWmQ", "measured": { "height": 451, "width": 320 @@ -3249,7 +3251,7 @@ }, { "data": { - "id": "AstraDB-s5fQW", + "id": "AstraDB-92S9t", "node": { "base_classes": [ "Data", @@ -3289,7 +3291,7 @@ ], "frozen": false, "icon": "AstraDB", - "last_updated": "2025-08-26T16:31:17.471Z", + "last_updated": "2025-08-26T16:46:21.524Z", "legacy": false, "metadata": { "code_hash": "23fbe9daca09", @@ -3326,6 +3328,8 @@ "group_outputs": false, "method": "search_documents", "name": "search_results", + "options": null, + "required_inputs": null, "selected": "Data", "tool_mode": true, "types": [ @@ -3340,6 +3344,8 @@ "group_outputs": false, "method": "as_dataframe", "name": "dataframe", + "options": null, + "required_inputs": null, "selected": "DataFrame", "tool_mode": true, "types": [ @@ -3355,6 +3361,8 @@ "hidden": true, "method": "as_vector_store", "name": "vectorstoreconnection", + "options": null, + "required_inputs": null, "selected": "VectorStore", "tool_mode": true, "types": [ @@ -4041,7 +4049,7 @@ "type": "AstraDB" }, "dragging": false, - "id": "AstraDB-s5fQW", + "id": "AstraDB-92S9t", "measured": { "height": 457, "width": 320 @@ -4055,7 +4063,7 @@ }, { "data": { - "id": "AstraDB-WAq6g", + "id": "AstraDB-Lm2es", "node": { "base_classes": [ "Data", @@ -4095,7 +4103,7 @@ ], "frozen": false, "icon": "AstraDB", - "last_updated": "2025-08-26T16:31:59.909Z", + "last_updated": "2025-08-26T16:46:21.525Z", "legacy": false, "metadata": { "code_hash": "23fbe9daca09", @@ -4132,7 +4140,8 @@ "group_outputs": false, "method": "search_documents", "name": "search_results", - "selected": "Data", + "options": null, + "required_inputs": null, "tool_mode": true, "types": [ "Data" @@ -4146,6 +4155,8 @@ "group_outputs": false, "method": "as_dataframe", "name": "dataframe", + "options": null, + "required_inputs": null, "selected": "DataFrame", "tool_mode": true, "types": [ @@ -4161,7 +4172,8 @@ "hidden": true, "method": "as_vector_store", "name": "vectorstoreconnection", - "selected": "VectorStore", + "options": null, + "required_inputs": null, "tool_mode": true, "types": [ "VectorStore" @@ -4842,12 +4854,12 @@ }, "tool_mode": false }, - "selected_output": "search_results", + "selected_output": "dataframe", "showNode": true, "type": "AstraDB" }, "dragging": false, - "id": "AstraDB-WAq6g", + "id": "AstraDB-Lm2es", "measured": { "height": 457, "width": 320 @@ -4861,14 +4873,14 @@ } ], "viewport": { - "x": -129.50701026456488, - "y": -80.5782691570264, - "zoom": 0.46801321898737636 + "x": 100.76434400868948, + "y": -144.93713359276444, + "zoom": 0.43125741996443906 } }, "description": "Load your data for chat context with Retrieval Augmented Generation.", "endpoint_name": null, - "id": "b59cc5b1-9dae-4064-bfea-08a20b9d7080", + "id": "3a54a0a2-dfbf-47b7-84f6-ef5ad837944c", "is_component": false, "last_tested_version": "1.5.0.post2", "name": "Vector Store RAG", From 187959d00681d38eeedf276f4d697e887e676003 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 26 Aug 2025 16:49:19 +0000 Subject: [PATCH 4/4] [autofix.ci] apply automated fixes --- .../starter_projects/Vector Store RAG.json | 50 ++++++++----------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index 8d9d4f6a8dde..63810f0a2f39 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -26,9 +26,9 @@ "id": "reactflow__edge-ChatInput-w1dxV{œdataTypeœ:œChatInputœ,œidœ:œChatInput-w1dxVœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-jBCaI{œfieldNameœ:œquestionœ,œidœ:œPrompt-jBCaIœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", "selected": false, "source": "ChatInput-w1dxV", - "sourceHandle": "{œdataTypeœ:œChatInputœ,œidœ:œChatInput-w1dxVœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-w1dxVœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", "target": "Prompt-jBCaI", - "targetHandle": "{œfieldNameœ:œquestionœ,œidœ:œPrompt-jBCaIœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}" + "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-jBCaIœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -55,9 +55,9 @@ "id": "reactflow__edge-parser-LlSmH{œdataTypeœ:œparserœ,œidœ:œparser-LlSmHœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-jBCaI{œfieldNameœ:œcontextœ,œidœ:œPrompt-jBCaIœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", "selected": false, "source": "parser-LlSmH", - "sourceHandle": "{œdataTypeœ:œparserœ,œidœ:œparser-LlSmHœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œparserœ, œidœ: œparser-LlSmHœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", "target": "Prompt-jBCaI", - "targetHandle": "{œfieldNameœ:œcontextœ,œidœ:œPrompt-jBCaIœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}" + "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-jBCaIœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -85,9 +85,9 @@ "id": "reactflow__edge-File-TyJTr{œdataTypeœ:œFileœ,œidœ:œFile-TyJTrœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-SplitText-YWbhC{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-YWbhCœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", "selected": false, "source": "File-TyJTr", - "sourceHandle": "{œdataTypeœ:œFileœ,œidœ:œFile-TyJTrœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-TyJTrœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", "target": "SplitText-YWbhC", - "targetHandle": "{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-YWbhCœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}" + "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-YWbhCœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}" }, { "animated": false, @@ -113,9 +113,9 @@ "id": "reactflow__edge-Prompt-jBCaI{œdataTypeœ:œPromptœ,œidœ:œPrompt-jBCaIœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-LanguageModelComponent-XEWmQ{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-XEWmQœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, "source": "Prompt-jBCaI", - "sourceHandle": "{œdataTypeœ:œPromptœ,œidœ:œPrompt-jBCaIœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-jBCaIœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", "target": "LanguageModelComponent-XEWmQ", - "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-XEWmQœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œLanguageModelComponent-XEWmQœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -143,9 +143,9 @@ "id": "reactflow__edge-LanguageModelComponent-XEWmQ{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-XEWmQœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-jIJTT{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-jIJTTœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}", "selected": false, "source": "LanguageModelComponent-XEWmQ", - "sourceHandle": "{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-XEWmQœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œLanguageModelComponentœ, œidœ: œLanguageModelComponent-XEWmQœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", "target": "ChatOutput-jIJTT", - "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-jIJTTœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}" + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-jIJTTœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -172,9 +172,9 @@ "id": "reactflow__edge-SplitText-YWbhC{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-YWbhCœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-AstraDB-92S9t{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-92S9tœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", "selected": false, "source": "SplitText-YWbhC", - "sourceHandle": "{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-YWbhCœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}", + "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-YWbhCœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", "target": "AstraDB-92S9t", - "targetHandle": "{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-92S9tœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}" + "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-92S9tœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" }, { "animated": false, @@ -200,9 +200,9 @@ "id": "reactflow__edge-ChatInput-w1dxV{œdataTypeœ:œChatInputœ,œidœ:œChatInput-w1dxVœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-Lm2es{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-Lm2esœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", "selected": false, "source": "ChatInput-w1dxV", - "sourceHandle": "{œdataTypeœ:œChatInputœ,œidœ:œChatInput-w1dxVœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-w1dxVœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", "target": "AstraDB-Lm2es", - "targetHandle": "{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-Lm2esœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}" + "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-Lm2esœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}" }, { "className": "", @@ -227,9 +227,9 @@ }, "id": "xy-edge__AstraDB-Lm2es{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-Lm2esœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-parser-LlSmH{œfieldNameœ:œinput_dataœ,œidœ:œparser-LlSmHœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", "source": "AstraDB-Lm2es", - "sourceHandle": "{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-Lm2esœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}", + "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-Lm2esœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", "target": "parser-LlSmH", - "targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œparser-LlSmHœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}" + "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œparser-LlSmHœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" } ], "nodes": [ @@ -3328,9 +3328,7 @@ "group_outputs": false, "method": "search_documents", "name": "search_results", - "options": null, - "required_inputs": null, - "selected": "Data", + "selected": null, "tool_mode": true, "types": [ "Data" @@ -3344,8 +3342,6 @@ "group_outputs": false, "method": "as_dataframe", "name": "dataframe", - "options": null, - "required_inputs": null, "selected": "DataFrame", "tool_mode": true, "types": [ @@ -3361,9 +3357,7 @@ "hidden": true, "method": "as_vector_store", "name": "vectorstoreconnection", - "options": null, - "required_inputs": null, - "selected": "VectorStore", + "selected": null, "tool_mode": true, "types": [ "VectorStore" @@ -4140,8 +4134,7 @@ "group_outputs": false, "method": "search_documents", "name": "search_results", - "options": null, - "required_inputs": null, + "selected": null, "tool_mode": true, "types": [ "Data" @@ -4155,8 +4148,6 @@ "group_outputs": false, "method": "as_dataframe", "name": "dataframe", - "options": null, - "required_inputs": null, "selected": "DataFrame", "tool_mode": true, "types": [ @@ -4172,8 +4163,7 @@ "hidden": true, "method": "as_vector_store", "name": "vectorstoreconnection", - "options": null, - "required_inputs": null, + "selected": null, "tool_mode": true, "types": [ "VectorStore"