diff --git a/src/backend/base/langflow/initial_setup/starter_projects/AccumulatorCheckAgent.json b/src/backend/base/langflow/initial_setup/starter_projects/AccumulatorCheckAgent.json index fe3d1d8e3c1d..9f74ef6d7aa9 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/AccumulatorCheckAgent.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/AccumulatorCheckAgent.json @@ -2784,7 +2784,7 @@ "last_tested_version": "1.6.3", "name": "Accumulator Check Agent", "tags": [ - "UM", + "utilization-management", "accumulator", "utilization", "reusable", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Auth Guideline.json b/src/backend/base/langflow/initial_setup/starter_projects/Auth Guideline.json index db0d7112eebe..76fd6343597a 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Auth Guideline.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Auth Guideline.json @@ -7,7 +7,7 @@ "data": { "sourceHandle": { "dataType": "TextInput", - "id": "TextInput-JeCKa", + "id": "TextInput-86Rvw", "name": "text", "output_types": [ "Message" @@ -15,19 +15,19 @@ }, "targetHandle": { "fieldName": "diagnosis_description", - "id": "Prompt-IL3D0", + "id": "Prompt-miaxU", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-TextInput-JeCKa{œdataTypeœ:œTextInputœ,œidœ:œTextInput-JeCKaœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-Prompt-IL3D0{œfieldNameœ:œdiagnosis_descriptionœ,œidœ:œPrompt-IL3D0œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-TextInput-86Rvw{œdataTypeœ:œTextInputœ,œidœ:œTextInput-86Rvwœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-Prompt-miaxU{œfieldNameœ:œdiagnosis_descriptionœ,œidœ:œPrompt-miaxUœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "TextInput-JeCKa", - "sourceHandle": "{œdataTypeœ: œTextInputœ, œidœ: œTextInput-JeCKaœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-IL3D0", - "targetHandle": "{œfieldNameœ: œdiagnosis_descriptionœ, œidœ: œPrompt-IL3D0œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "TextInput-86Rvw", + "sourceHandle": "{œdataTypeœ: œTextInputœ, œidœ: œTextInput-86Rvwœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-miaxU", + "targetHandle": "{œfieldNameœ: œdiagnosis_descriptionœ, œidœ: œPrompt-miaxUœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -35,7 +35,7 @@ "data": { "sourceHandle": { "dataType": "TextInput", - "id": "TextInput-wrMMy", + "id": "TextInput-ua4NV", "name": "text", "output_types": [ "Message" @@ -43,19 +43,19 @@ }, "targetHandle": { "fieldName": "procedure_description", - "id": "Prompt-IL3D0", + "id": "Prompt-miaxU", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-TextInput-wrMMy{œdataTypeœ:œTextInputœ,œidœ:œTextInput-wrMMyœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-Prompt-IL3D0{œfieldNameœ:œprocedure_descriptionœ,œidœ:œPrompt-IL3D0œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-TextInput-ua4NV{œdataTypeœ:œTextInputœ,œidœ:œTextInput-ua4NVœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-Prompt-miaxU{œfieldNameœ:œprocedure_descriptionœ,œidœ:œPrompt-miaxUœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "TextInput-wrMMy", - "sourceHandle": "{œdataTypeœ: œTextInputœ, œidœ: œTextInput-wrMMyœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-IL3D0", - "targetHandle": "{œfieldNameœ: œprocedure_descriptionœ, œidœ: œPrompt-IL3D0œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "TextInput-ua4NV", + "sourceHandle": "{œdataTypeœ: œTextInputœ, œidœ: œTextInput-ua4NVœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-miaxU", + "targetHandle": "{œfieldNameœ: œprocedure_descriptionœ, œidœ: œPrompt-miaxUœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -63,7 +63,7 @@ "data": { "sourceHandle": { "dataType": "Prompt", - "id": "Prompt-IL3D0", + "id": "Prompt-miaxU", "name": "prompt", "output_types": [ "Message" @@ -71,19 +71,19 @@ }, "targetHandle": { "fieldName": "search_query", - "id": "KnowledgeHubSearch-JEgXB", + "id": "KnowledgeHubSearch-2yCHC", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-Prompt-IL3D0{œdataTypeœ:œPromptœ,œidœ:œPrompt-IL3D0œ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-KnowledgeHubSearch-JEgXB{œfieldNameœ:œsearch_queryœ,œidœ:œKnowledgeHubSearch-JEgXBœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-Prompt-miaxU{œdataTypeœ:œPromptœ,œidœ:œPrompt-miaxUœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-KnowledgeHubSearch-2yCHC{œfieldNameœ:œsearch_queryœ,œidœ:œKnowledgeHubSearch-2yCHCœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "Prompt-IL3D0", - "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-IL3D0œ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", - "target": "KnowledgeHubSearch-JEgXB", - "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œKnowledgeHubSearch-JEgXBœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "Prompt-miaxU", + "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-miaxUœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", + "target": "KnowledgeHubSearch-2yCHC", + "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œKnowledgeHubSearch-2yCHCœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -91,7 +91,7 @@ "data": { "sourceHandle": { "dataType": "KnowledgeHubSearch", - "id": "KnowledgeHubSearch-JEgXB", + "id": "KnowledgeHubSearch-2yCHC", "name": "query_results", "output_types": [ "Data" @@ -99,7 +99,7 @@ }, "targetHandle": { "fieldName": "input_data", - "id": "ParserComponent-53Zh2", + "id": "ParserComponent-BxCXL", "inputTypes": [ "DataFrame", "Data" @@ -107,12 +107,12 @@ "type": "other" } }, - "id": "reactflow__edge-KnowledgeHubSearch-JEgXB{œdataTypeœ:œKnowledgeHubSearchœ,œidœ:œKnowledgeHubSearch-JEgXBœ,œnameœ:œquery_resultsœ,œoutput_typesœ:[œDataœ]}-ParserComponent-53Zh2{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-53Zh2œ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-KnowledgeHubSearch-2yCHC{œdataTypeœ:œKnowledgeHubSearchœ,œidœ:œKnowledgeHubSearch-2yCHCœ,œnameœ:œquery_resultsœ,œoutput_typesœ:[œDataœ]}-ParserComponent-BxCXL{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-BxCXLœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", "selected": false, - "source": "KnowledgeHubSearch-JEgXB", - "sourceHandle": "{œdataTypeœ: œKnowledgeHubSearchœ, œidœ: œKnowledgeHubSearch-JEgXBœ, œnameœ: œquery_resultsœ, œoutput_typesœ: [œDataœ]}", - "target": "ParserComponent-53Zh2", - "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-53Zh2œ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" + "source": "KnowledgeHubSearch-2yCHC", + "sourceHandle": "{œdataTypeœ: œKnowledgeHubSearchœ, œidœ: œKnowledgeHubSearch-2yCHCœ, œnameœ: œquery_resultsœ, œoutput_typesœ: [œDataœ]}", + "target": "ParserComponent-BxCXL", + "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-BxCXLœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" }, { "animated": false, @@ -120,7 +120,7 @@ "data": { "sourceHandle": { "dataType": "ParserComponent", - "id": "ParserComponent-53Zh2", + "id": "ParserComponent-BxCXL", "name": "parsed_text", "output_types": [ "Message" @@ -128,19 +128,19 @@ }, "targetHandle": { "fieldName": "context", - "id": "Prompt-IvJup", + "id": "Prompt-kGSQZ", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-ParserComponent-53Zh2{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-53Zh2œ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-IvJup{œfieldNameœ:œcontextœ,œidœ:œPrompt-IvJupœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-ParserComponent-BxCXL{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-BxCXLœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-kGSQZ{œfieldNameœ:œcontextœ,œidœ:œPrompt-kGSQZœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "ParserComponent-53Zh2", - "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-53Zh2œ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-IvJup", - "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-IvJupœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "ParserComponent-BxCXL", + "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-BxCXLœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-kGSQZ", + "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-kGSQZœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -148,7 +148,7 @@ "data": { "sourceHandle": { "dataType": "Prompt", - "id": "Prompt-IL3D0", + "id": "Prompt-miaxU", "name": "prompt", "output_types": [ "Message" @@ -156,19 +156,19 @@ }, "targetHandle": { "fieldName": "question", - "id": "Prompt-IvJup", + "id": "Prompt-kGSQZ", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-Prompt-IL3D0{œdataTypeœ:œPromptœ,œidœ:œPrompt-IL3D0œ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-Prompt-IvJup{œfieldNameœ:œquestionœ,œidœ:œPrompt-IvJupœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-Prompt-miaxU{œdataTypeœ:œPromptœ,œidœ:œPrompt-miaxUœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-Prompt-kGSQZ{œfieldNameœ:œquestionœ,œidœ:œPrompt-kGSQZœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "Prompt-IL3D0", - "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-IL3D0œ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-IvJup", - "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-IvJupœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "Prompt-miaxU", + "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-miaxUœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-kGSQZ", + "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-kGSQZœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -176,61 +176,61 @@ "data": { "sourceHandle": { "dataType": "Prompt", - "id": "Prompt-IvJup", + "id": "Prompt-kGSQZ", "name": "prompt", "output_types": [ "Message" ] }, "targetHandle": { - "fieldName": "system_prompt", - "id": "Agent-wwvo0", + "fieldName": "input_value", + "id": "AzureOpenAIModel-TegVj", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "xy-edge__Prompt-IvJup{œdataTypeœ:œPromptœ,œidœ:œPrompt-IvJupœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-Agent-wwvo0{œfieldNameœ:œsystem_promptœ,œidœ:œAgent-wwvo0œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-Prompt-kGSQZ{œdataTypeœ:œPromptœ,œidœ:œPrompt-kGSQZœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-AzureOpenAIModel-TegVj{œfieldNameœ:œinput_valueœ,œidœ:œAzureOpenAIModel-TegVjœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "Prompt-IvJup", - "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-IvJupœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", - "target": "Agent-wwvo0", - "targetHandle": "{œfieldNameœ: œsystem_promptœ, œidœ: œAgent-wwvo0œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "Prompt-kGSQZ", + "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-kGSQZœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", + "target": "AzureOpenAIModel-TegVj", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œAzureOpenAIModel-TegVjœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "Agent", - "id": "Agent-wwvo0", - "name": "response", + "dataType": "AzureOpenAIModel", + "id": "AzureOpenAIModel-TegVj", + "name": "text_output", "output_types": [ "Message" ] }, "targetHandle": { "fieldName": "input_value", - "id": "TextOutput-duFhS", + "id": "TextOutput-eIziv", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "xy-edge__Agent-wwvo0{œdataTypeœ:œAgentœ,œidœ:œAgent-wwvo0œ,œnameœ:œresponseœ,œoutput_typesœ:[œMessageœ]}-TextOutput-duFhS{œfieldNameœ:œinput_valueœ,œidœ:œTextOutput-duFhSœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-AzureOpenAIModel-TegVj{œdataTypeœ:œAzureOpenAIModelœ,œidœ:œAzureOpenAIModel-TegVjœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-TextOutput-eIziv{œfieldNameœ:œinput_valueœ,œidœ:œTextOutput-eIzivœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "Agent-wwvo0", - "sourceHandle": "{œdataTypeœ: œAgentœ, œidœ: œAgent-wwvo0œ, œnameœ: œresponseœ, œoutput_typesœ: [œMessageœ]}", - "target": "TextOutput-duFhS", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œTextOutput-duFhSœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "AzureOpenAIModel-TegVj", + "sourceHandle": "{œdataTypeœ: œAzureOpenAIModelœ, œidœ: œAzureOpenAIModel-TegVjœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", + "target": "TextOutput-eIziv", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œTextOutput-eIzivœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" } ], "nodes": [ { "data": { - "id": "Prompt-IL3D0", + "id": "Prompt-miaxU", "node": { "base_classes": [ "Message" @@ -395,21 +395,22 @@ "showNode": true, "type": "Prompt" }, - "id": "Prompt-IL3D0", + "dragging": false, + "id": "Prompt-miaxU", "measured": { - "height": 418, + "height": 419, "width": 320 }, "position": { - "x": 636.3786498020982, - "y": 195.66691970475517 + "x": 561.1698099779404, + "y": 399.2108494686537 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "TextInput-JeCKa", + "id": "TextInput-86Rvw", "node": { "base_classes": [ "Message" @@ -499,21 +500,22 @@ "showNode": true, "type": "TextInput" }, - "id": "TextInput-JeCKa", + "dragging": false, + "id": "TextInput-86Rvw", "measured": { - "height": 203, + "height": 204, "width": 320 }, "position": { - "x": 86.35391506260675, - "y": 59.731998794766696 + "x": 142.34278464270332, + "y": 382.3294394399319 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "TextInput-wrMMy", + "id": "TextInput-ua4NV", "node": { "base_classes": [ "Message" @@ -603,21 +605,22 @@ "showNode": true, "type": "TextInput" }, - "id": "TextInput-wrMMy", + "dragging": false, + "id": "TextInput-ua4NV", "measured": { - "height": 203, + "height": 204, "width": 320 }, "position": { - "x": -85.01111743951907, - "y": 418.21976793714487 + "x": 136.00779349953325, + "y": 632.038951073729 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "KnowledgeHubSearch-JEgXB", + "id": "KnowledgeHubSearch-2yCHC", "node": { "base_classes": [ "Data" @@ -635,7 +638,7 @@ ], "frozen": false, "icon": "Autonomize", - "last_updated": "2025-10-15T09:51:09.796Z", + "last_updated": "2025-10-29T10:54:22.730Z", "legacy": false, "lf_version": "1.6.3", "metadata": {}, @@ -724,7 +727,7 @@ "trace_as_metadata": true, "type": "str", "value": [ - "CMS - 2024" + "Carelon Guidelines - 2023" ] } }, @@ -733,21 +736,22 @@ "showNode": true, "type": "KnowledgeHubSearch" }, - "id": "KnowledgeHubSearch-JEgXB", + "dragging": false, + "id": "KnowledgeHubSearch-2yCHC", "measured": { "height": 302, "width": 320 }, "position": { - "x": 1075.1498497300454, - "y": -56.481069223916364 + "x": 969.4848959616387, + "y": 324.82388552289945 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "Prompt-IvJup", + "id": "Prompt-kGSQZ", "node": { "base_classes": [ "Message" @@ -912,21 +916,22 @@ "showNode": true, "type": "Prompt" }, - "id": "Prompt-IvJup", + "dragging": false, + "id": "Prompt-kGSQZ", "measured": { - "height": 434, + "height": 435, "width": 320 }, "position": { - "x": 2031.845129782244, - "y": 228.4506613628165 + "x": 1723.4693089356597, + "y": 502.92691424232413 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "ParserComponent-53Zh2", + "id": "ParserComponent-BxCXL", "node": { "base_classes": [ "Message" @@ -1056,7 +1061,7 @@ "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "Text: {text}" + "value": "{text}" }, "sep": { "_input_type": "MessageTextInput", @@ -1088,285 +1093,136 @@ "type": "ParserComponent" }, "dragging": false, - "id": "ParserComponent-53Zh2", + "id": "ParserComponent-BxCXL", "measured": { - "height": 327, + "height": 329, "width": 320 }, "position": { - "x": 1510.3268918831266, - "y": 689.366330242902 + "x": 1331.2810413787931, + "y": 319.78535061206645 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "TextOutput-duFhS", + "id": "AzureOpenAIModel-TegVj", "node": { "base_classes": [ + "LanguageModel", "Message" ], "beta": false, - "category": "outputs", + "category": "models", "conditional_paths": [], "custom_fields": {}, - "description": "Sends text output via API.", - "display_name": "Text Output", - "documentation": "", + "description": "Generate text using Azure OpenAI LLMs.", + "display_name": "Azure OpenAI", + "documentation": "https://python.langchain.com/docs/integrations/llms/azure_openai", "edited": false, "field_order": [ - "input_value" + "input_value", + "system_message", + "stream", + "azure_endpoint", + "azure_deployment", + "api_key", + "api_version", + "temperature", + "max_tokens" ], "frozen": false, - "icon": "type", - "key": "TextOutput", + "icon": "Azure", + "key": "AzureOpenAIModel", "legacy": false, "lf_version": "1.6.3", - "metadata": {}, + "metadata": { + "keywords": [ + "model", + "llm", + "language model", + "large language model" + ] + }, "minimized": false, "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "Output Text", + "display_name": "Model Response", "group_outputs": false, "method": "text_response", - "name": "text", + "name": "text_output", "selected": "Message", "tool_mode": true, "types": [ "Message" ], "value": "__UNDEFINED__" - } - ], - "pinned": false, - "score": 0.003169567463043492, - "template": { - "_type": "Component", - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "code", - "value": "from langflow.base.io.text import TextComponent\nfrom langflow.io import MultilineInput, Output\nfrom langflow.schema.message import Message\n\n\nclass TextOutputComponent(TextComponent):\n display_name = \"Text Output\"\n description = \"Sends text output via API.\"\n documentation: str = \"https://docs.langflow.org/components-io#text-output\"\n icon = \"type\"\n name = \"TextOutput\"\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Text to be passed as output.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\n ]\n\n def text_response(self) -> Message:\n message = Message(\n text=self.input_value,\n )\n self.status = self.input_value\n return message\n" }, - "input_value": { - "_input_type": "MultilineInput", - "advanced": false, - "copy_field": false, - "display_name": "Inputs", - "dynamic": false, - "info": "Text to be passed as output.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "multiline": true, - "name": "input_value", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "Cancer" - } - }, - "tool_mode": false - }, - "showNode": true, - "type": "TextOutput" - }, - "id": "TextOutput-duFhS", - "measured": { - "height": 203, - "width": 320 - }, - "position": { - "x": 3257.661505858083, - "y": 288.83540671710387 - }, - "selected": true, - "type": "genericNode" - }, - { - "data": { - "id": "Agent-wwvo0", - "node": { - "base_classes": [ - "Message" - ], - "beta": false, - "category": "agents", - "conditional_paths": [], - "custom_fields": {}, - "description": "Define the agent's instructions, then enter a task to complete using tools.", - "display_name": "Agent", - "documentation": "https://docs.langflow.org/agents", - "edited": false, - "field_order": [ - "agent_llm", - "max_tokens", - "model_kwargs", - "model_name", - "openai_api_base", - "api_key", - "temperature", - "seed", - "max_retries", - "timeout", - "system_prompt", - "n_messages", - "format_instructions", - "output_schema", - "tools", - "input_value", - "handle_parsing_errors", - "verbose", - "max_iterations", - "agent_description", - "add_current_date_tool" - ], - "frozen": false, - "icon": "bot", - "key": "Agent", - "last_updated": "2025-10-15T09:53:17.927Z", - "legacy": false, - "lf_version": "1.6.3", - "metadata": {}, - "minimized": false, - "output_types": [], - "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "Response", + "display_name": "Language Model", "group_outputs": false, - "method": "message_response", - "name": "response", - "options": null, - "required_inputs": null, - "selected": "Message", + "method": "build_model", + "name": "model_output", + "selected": null, "tool_mode": true, "types": [ - "Message" + "LanguageModel" ], "value": "__UNDEFINED__" } ], "pinned": false, - "score": 1.1732828199964098e-19, + "score": 0.003924824467069744, "template": { "_type": "Component", - "add_current_date_tool": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Current Date", + "api_key": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Azure Chat OpenAI API Key", "dynamic": false, - "info": "If true, will add a tool to the agent that returns the current date.", + "info": "", "input_types": [], - "list": false, - "list_add_label": "Add More", - "name": "add_current_date_tool", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "agent_description": { - "_input_type": "MultilineInput", - "advanced": true, - "copy_field": false, - "display_name": "Agent Description [Deprecated]", - "dynamic": false, - "info": "The description of the agent. This is only used when in Tool Mode. Defaults to 'A helpful assistant with access to the following tools:' and tools are added dynamically. This feature is deprecated and will be removed in future versions.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "multiline": true, - "name": "agent_description", + "load_from_db": true, + "name": "api_key", + "password": true, "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, "type": "str", - "value": "A helpful assistant with access to the following tools:" + "value": "" }, - "agent_llm": { + "api_version": { "_input_type": "DropdownInput", "advanced": false, "combobox": false, "dialog_inputs": {}, - "display_name": "Model Provider", + "display_name": "API Version", "dynamic": false, - "external_options": { - "fields": { - "data": { - "node": { - "display_name": "Connect other models", - "icon": "CornerDownLeft", - "name": "connect_other_models" - } - } - } - }, - "info": "The provider of the language model that the agent will use to generate responses.", - "input_types": [], - "name": "agent_llm", + "info": "", + "name": "api_version", "options": [ - "Anthropic", - "Google Generative AI", - "OpenAI", - "Azure OpenAI" - ], - "options_metadata": [ - { - "icon": "Anthropic" - }, - { - "icon": "GoogleGenerativeAI" - }, - { - "icon": "OpenAI" - }, - { - "icon": "Azure" - }, - { - "icon": "brain" - } + "2025-02-01-preview", + "2025-01-01-preview", + "2024-12-01-preview", + "2024-10-01-preview", + "2024-09-01-preview", + "2024-08-01-preview", + "2024-07-01-preview", + "2024-06-01", + "2024-03-01-preview", + "2024-02-15-preview", + "2023-12-01-preview", + "2023-05-15" ], + "options_metadata": [], "placeholder": "", - "real_time_refresh": true, - "refresh_button": false, "required": false, "show": true, "title_case": false, @@ -1374,94 +1230,78 @@ "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "Azure OpenAI" + "value": "2024-08-01-preview" }, - "api_key": { - "_input_type": "SecretStrInput", + "azure_deployment": { + "_input_type": "MessageTextInput", "advanced": false, - "display_name": "OpenAI API Key", + "display_name": "Deployment Name", "dynamic": false, - "info": "The OpenAI API Key to use for the OpenAI model.", - "input_types": [], - "load_from_db": false, - "name": "api_key", - "password": true, - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "type": "str", - "value": null - }, - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", "info": "", + "input_types": [ + "Message" + ], "list": false, + "list_add_label": "Add More", "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, + "name": "azure_deployment", "placeholder": "", "required": true, "show": true, "title_case": false, - "type": "code", - "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import (\n ToolCallingAgentComponent,\n)\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n IntInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\nfrom langflow.custom.default_providers import apply_provider_defaults\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"OpenAI\", \"Azure OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n if \"OpenAI\" in MODEL_PROVIDERS_DICT:\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n else:\n openai_inputs_filtered = []\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST],\n value=\"OpenAI\",\n real_time_refresh=True,\n refresh_button=False,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST if key in MODELS_METADATA]\n + [{\"icon\": \"brain\"}],\n external_options={\n \"fields\": {\n \"data\": {\n \"node\": {\n \"name\": \"connect_other_models\",\n \"display_name\": \"Connect other models\",\n \"icon\": \"CornerDownLeft\",\n }\n }\n },\n },\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\n \"true\",\n \"1\",\n \"t\",\n \"y\",\n \"yes\",\n ]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (\n ExceptionWithMessageError,\n ValueError,\n TypeError,\n RuntimeError,\n ) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (\n ExceptionWithMessageError,\n ValueError,\n TypeError,\n NotImplementedError,\n AttributeError,\n ) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(\n session_id=self.graph.session_id,\n order=\"Ascending\",\n n_messages=self.n_messages,\n )\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n \n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n build_config.update(fields_to_add)\n \n # Apply provider-specific defaults (only for Azure OpenAI currently)\n if field_value == \"Azure OpenAI\":\n build_config = apply_provider_defaults(field_value, build_config)\n \n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n \n elif field_value == \"connect_other_models\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST],\n real_time_refresh=True,\n refresh_button=False,\n input_types=[\"LanguageModel\"],\n placeholder=\"Awaiting model input.\",\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST if key in MODELS_METADATA],\n external_options={\n \"fields\": {\n \"data\": {\n \"node\": {\n \"name\": \"connect_other_models\",\n \"display_name\": \"Connect other models\",\n \"icon\": \"CornerDownLeft\",\n },\n }\n },\n },\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n \n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n \n # Rest of your existing method remains unchanged...\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\",\n tool_description=description,\n callbacks=self.get_langchain_callbacks(),\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n" + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "GPT316k" }, - "format_instructions": { - "_input_type": "MultilineInput", - "advanced": true, - "copy_field": false, - "display_name": "Output Format Instructions", + "azure_endpoint": { + "_input_type": "MessageTextInput", + "advanced": false, + "display_name": "Azure Endpoint", "dynamic": false, - "info": "Generic Template for structured output formatting. Valid only with Structured response.", + "info": "Your Azure endpoint, including the resource. Example: `https://example-resource.azure.openai.com/`", "input_types": [ "Message" ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "multiline": true, - "name": "format_instructions", + "name": "azure_endpoint", "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, "tool_mode": false, "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "You are an AI that extracts structured JSON objects from unstructured text. Use a predefined schema with expected types (str, int, float, bool, dict). Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. Fill missing or ambiguous values with defaults: null for missing values. Remove exact duplicates but keep variations that have different field values. Always return valid JSON in the expected format, never throw errors. If multiple objects can be extracted, return them all in the structured format." + "value": "https://cog-54p2emd7pu2vu.openai.azure.com/" }, - "handle_parsing_errors": { - "_input_type": "BoolInput", + "code": { "advanced": true, - "display_name": "Handle Parse Errors", - "dynamic": false, - "info": "Should the Agent fix errors when reading user input for better processing?", - "input_types": [], + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", "list": false, - "list_add_label": "Add More", - "name": "handle_parsing_errors", + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "code", + "value": "from langchain_openai import AzureChatOpenAI\n\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import MessageTextInput\nfrom langflow.io import DropdownInput, IntInput, SecretStrInput, SliderInput\n\n\nclass AzureChatOpenAIComponent(LCModelComponent):\n display_name: str = \"Azure OpenAI\"\n description: str = \"Generate text using Azure OpenAI LLMs.\"\n documentation: str = \"https://python.langchain.com/docs/integrations/llms/azure_openai\"\n beta = False\n icon = \"Azure\"\n name = \"AzureOpenAIModel\"\n\n AZURE_OPENAI_API_VERSIONS = [\n \"2024-06-01\",\n \"2024-07-01-preview\",\n \"2024-08-01-preview\",\n \"2024-09-01-preview\",\n \"2024-10-01-preview\",\n \"2023-05-15\",\n \"2023-12-01-preview\",\n \"2024-02-15-preview\",\n \"2024-03-01-preview\",\n \"2024-12-01-preview\",\n \"2025-01-01-preview\",\n \"2025-02-01-preview\",\n ]\n\n inputs = [\n *LCModelComponent._base_inputs,\n MessageTextInput(\n name=\"azure_endpoint\",\n display_name=\"Azure Endpoint\",\n info=\"Your Azure endpoint, including the resource. Example: `https://example-resource.azure.openai.com/`\",\n required=True,\n ),\n MessageTextInput(name=\"azure_deployment\", display_name=\"Deployment Name\", required=True),\n SecretStrInput(name=\"api_key\", display_name=\"Azure Chat OpenAI API Key\", required=True),\n DropdownInput(\n name=\"api_version\",\n display_name=\"API Version\",\n options=sorted(AZURE_OPENAI_API_VERSIONS, reverse=True),\n value=next(\n (\n version\n for version in sorted(AZURE_OPENAI_API_VERSIONS, reverse=True)\n if not version.endswith(\"-preview\")\n ),\n AZURE_OPENAI_API_VERSIONS[0],\n ),\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.7,\n range_spec=RangeSpec(min=0, max=2, step=0.01),\n info=\"Controls randomness. Lower values are more deterministic, higher values are more creative.\",\n advanced=True,\n ),\n IntInput(\n name=\"max_tokens\",\n display_name=\"Max Tokens\",\n advanced=True,\n info=\"The maximum number of tokens to generate. Set to 0 for unlimited tokens.\",\n ),\n ]\n\n def build_model(self) -> LanguageModel: # type: ignore[type-var]\n azure_endpoint = self.azure_endpoint\n azure_deployment = self.azure_deployment\n api_version = self.api_version\n api_key = self.api_key\n temperature = self.temperature\n max_tokens = self.max_tokens\n stream = self.stream\n\n try:\n output = AzureChatOpenAI(\n azure_endpoint=azure_endpoint,\n azure_deployment=azure_deployment,\n api_version=api_version,\n api_key=api_key,\n temperature=temperature,\n max_tokens=max_tokens or None,\n streaming=stream,\n )\n except Exception as e:\n msg = f\"Could not connect to AzureOpenAI API: {e}\"\n raise ValueError(msg) from e\n\n return output\n" }, "input_value": { "_input_type": "MessageInput", "advanced": false, "display_name": "Input", "dynamic": false, - "info": "The input provided by the user for the agent to process.", + "info": "", "input_types": [ "Message" ], @@ -1473,56 +1313,18 @@ "required": false, "show": true, "title_case": false, - "tool_mode": true, + "tool_mode": false, "trace_as_input": true, "trace_as_metadata": true, "type": "str", "value": "" }, - "max_iterations": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Max Iterations", - "dynamic": false, - "info": "The maximum number of attempts the agent can make to complete its task before it stops.", - "input_types": [], - "list": false, - "list_add_label": "Add More", - "name": "max_iterations", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 15 - }, - "max_retries": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Max Retries", - "dynamic": false, - "info": "The maximum number of retries to make when generating.", - "list": false, - "list_add_label": "Add More", - "name": "max_retries", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 5 - }, "max_tokens": { "_input_type": "IntInput", "advanced": true, "display_name": "Max Tokens", "dynamic": false, "info": "The maximum number of tokens to generate. Set to 0 for unlimited tokens.", - "input_types": [], "list": false, "list_add_label": "Add More", "name": "max_tokens", @@ -1535,215 +1337,31 @@ "type": "int", "value": "" }, - "model_kwargs": { - "_input_type": "DictInput", - "advanced": true, - "display_name": "Model Kwargs", - "dynamic": false, - "info": "Additional keyword arguments to pass to the model.", - "list": false, - "list_add_label": "Add More", - "name": "model_kwargs", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "type": "dict", - "value": {} - }, - "model_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": true, - "dialog_inputs": {}, - "display_name": "Model Name", - "dynamic": false, - "external_options": {}, - "info": "To see the model names, first choose a provider. Then, enter your API key and click the refresh button next to the model name.", - "name": "model_name", - "options": [ - "gpt-4o-mini", - "gpt-4o", - "gpt-4.1", - "gpt-4.1-mini", - "gpt-4.1-nano", - "gpt-4-turbo", - "gpt-4-turbo-preview", - "gpt-4", - "gpt-3.5-turbo", - "gpt-5", - "gpt-5-mini", - "gpt-5-nano", - "gpt-5-chat-latest", - "o1", - "o3-mini", - "o3", - "o3-pro", - "o4-mini", - "o4-mini-high" - ], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": false, - "required": false, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "gpt-4o-mini" - }, - "n_messages": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Number of Chat History Messages", - "dynamic": false, - "info": "Number of chat history messages to retrieve.", - "input_types": [], - "list": false, - "list_add_label": "Add More", - "name": "n_messages", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 100 - }, - "openai_api_base": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "OpenAI API Base", - "dynamic": false, - "info": "The base URL of the OpenAI API. Defaults to https://api.openai.com/v1. You can change this to use other APIs like JinaChat, LocalAI and Prem.", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "openai_api_base", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "output_schema": { - "_input_type": "TableInput", - "advanced": true, - "display_name": "Output Schema", - "dynamic": false, - "info": "Schema Validation: Define the structure and data types for structured output. No validation if no output schema.", - "input_types": [], - "is_list": true, - "list_add_label": "Add More", - "name": "output_schema", - "placeholder": "", - "required": false, - "show": true, - "table_icon": "Table", - "table_schema": { - "columns": [ - { - "default": "field", - "description": "Specify the name of the output field.", - "disable_edit": false, - "display_name": "Name", - "edit_mode": "inline", - "filterable": true, - "formatter": "text", - "hidden": false, - "name": "name", - "sortable": true, - "type": "str" - }, - { - "default": "description of field", - "description": "Describe the purpose of the output field.", - "disable_edit": false, - "display_name": "Description", - "edit_mode": "popover", - "filterable": true, - "formatter": "text", - "hidden": false, - "name": "description", - "sortable": true, - "type": "str" - }, - { - "default": "str", - "description": "Indicate the data type of the output field (e.g., str, int, float, bool, dict).", - "disable_edit": false, - "display_name": "Type", - "edit_mode": "inline", - "filterable": true, - "formatter": "text", - "hidden": false, - "name": "type", - "options": [ - "str", - "int", - "float", - "bool", - "dict" - ], - "sortable": true, - "type": "str" - }, - { - "default": false, - "description": "Set to True if this output field should be a list of the specified type.", - "disable_edit": false, - "display_name": "As List", - "edit_mode": "inline", - "filterable": true, - "formatter": "boolean", - "hidden": false, - "name": "multiple", - "sortable": true, - "type": "boolean" - } - ] - }, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "trigger_icon": "Table", - "trigger_text": "Open table", - "type": "table", - "value": [] - }, - "seed": { - "_input_type": "IntInput", + "stream": { + "_input_type": "BoolInput", "advanced": true, - "display_name": "Seed", + "display_name": "Stream", "dynamic": false, - "info": "The seed controls the reproducibility of the job.", + "info": "Stream the response from the model. Streaming works only in Chat.", "list": false, "list_add_label": "Add More", - "name": "seed", + "name": "stream", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, - "type": "int", - "value": 1 + "type": "bool", + "value": false }, - "system_prompt": { + "system_message": { "_input_type": "MultilineInput", "advanced": false, "copy_field": false, - "display_name": "Agent Instructions", + "display_name": "System Message", "dynamic": false, - "info": "System Prompt: Initial instructions and context provided to guide the agent's behavior.", + "info": "System message to pass to the model.", "input_types": [ "Message" ], @@ -1751,7 +1369,7 @@ "list_add_label": "Add More", "load_from_db": false, "multiline": true, - "name": "system_prompt", + "name": "system_message", "placeholder": "", "required": false, "show": true, @@ -1760,15 +1378,14 @@ "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "You are a helpful assistant that can use tools to answer questions and perform tasks." + "value": "You are an AI assistant known for your accuracy and helpfulness. Carefully review the following clinical guidelines that are used to approve procedures for various medical conditions. Follow these specific instructions: 1. You must generate exactly **eight (8)** guidelines — no more, no less. 2. If there are **more than eight** guidelines in the context, **combine** and merge them logically so that the final output always contains **eight guidelines**. 3. If there are **fewer than eight** guidelines, **split** or expand them appropriately, ensuring that each original guideline is still represented, and the final count remains **eight guidelines**. 4. Each guideline MUST be **standalone** and **independent** and in a single line. 5. **Every guideline** provided in the context must be considered to answer the question. **Do not skip** or omit any. Ensure your final output always contains exactly **eight comprehensive guidelines**." }, "temperature": { "_input_type": "SliderInput", "advanced": true, "display_name": "Temperature", "dynamic": false, - "info": "", - "input_types": [], + "info": "Controls randomness. Lower values are more deterministic, higher values are more creative.", "max_label": "", "max_label_icon": "", "min_label": "", @@ -1790,97 +1407,150 @@ "tool_mode": false, "type": "slider", "value": 0.7 - }, - "timeout": { - "_input_type": "IntInput", + } + }, + "tool_mode": false + }, + "selected_output": "text_output", + "showNode": true, + "type": "AzureOpenAIModel" + }, + "dragging": false, + "id": "AzureOpenAIModel-TegVj", + "measured": { + "height": 616, + "width": 320 + }, + "position": { + "x": 2086.915062956044, + "y": 499.23978889642524 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "TextOutput-eIziv", + "node": { + "base_classes": [ + "Message" + ], + "beta": false, + "category": "outputs", + "conditional_paths": [], + "custom_fields": {}, + "description": "Sends text output via API.", + "display_name": "Text Output", + "documentation": "", + "edited": false, + "field_order": [ + "input_value" + ], + "frozen": false, + "icon": "type", + "key": "TextOutput", + "legacy": false, + "lf_version": "1.4.3", + "metadata": {}, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Output Text", + "group_outputs": false, + "method": "text_response", + "name": "text", + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "score": 0.003169567463043492, + "template": { + "_type": "Component", + "code": { "advanced": true, - "display_name": "Timeout", - "dynamic": false, - "info": "The timeout for requests to OpenAI completion API.", + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", "list": false, - "list_add_label": "Add More", - "name": "timeout", + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 700 + "type": "code", + "value": "from langflow.base.io.text import TextComponent\nfrom langflow.io import MultilineInput, Output\nfrom langflow.schema.message import Message\n\n\nclass TextOutputComponent(TextComponent):\n display_name = \"Text Output\"\n description = \"Sends text output via API.\"\n documentation: str = \"https://docs.langflow.org/components-io#text-output\"\n icon = \"type\"\n name = \"TextOutput\"\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Text to be passed as output.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\n ]\n\n def text_response(self) -> Message:\n message = Message(\n text=self.input_value,\n )\n self.status = self.input_value\n return message\n" }, - "tools": { - "_input_type": "HandleInput", + "input_value": { + "_input_type": "MultilineInput", "advanced": false, - "display_name": "Tools", + "copy_field": false, + "display_name": "Inputs", "dynamic": false, - "info": "These are the tools that the agent can use to help with tasks.", + "info": "Text to be passed as output.", "input_types": [ - "Tool" + "Message" ], - "list": true, - "list_add_label": "Add More", - "name": "tools", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "other", - "value": "" - }, - "verbose": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Verbose", - "dynamic": false, - "info": "", - "input_types": [], "list": false, "list_add_label": "Add More", - "name": "verbose", + "load_from_db": false, + "multiline": true, + "name": "input_value", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "str", + "value": "" } }, "tool_mode": false }, "showNode": true, - "type": "Agent" + "type": "TextOutput" }, "dragging": false, - "id": "Agent-wwvo0", + "id": "TextOutput-eIziv", "measured": { - "height": 756, + "height": 204, "width": 320 }, "position": { - "x": 2711.1115025363047, - "y": 144.63197769914373 + "x": 2466.6328831880537, + "y": 926.6214851632049 }, "selected": false, "type": "genericNode" } ], "viewport": { - "x": -336.3177830698287, - "y": 108.75929242575396, - "zoom": 0.660770968079748 + "x": -791.2065297375518, + "y": -80.52160803113443, + "zoom": 0.5848688087039475 } }, - "description": "Locates and retrieves relevant clinical guidelines from policy documents for checking medical necessity ensuring quick-access to up-to-date criteria for informed clinical decision-making", + "description": "Locates and retrieves relevant clinical guidelines from policy documents for checking medical necessity ensuring quick-access to up-to-date criteria for informed clinical decision-making.", "endpoint_name": null, - "id": "e6af5859-3d9b-43b4-9e3f-4cd068223eef", + "id": "46a44581-daec-486c-b7b4-44c7bcebb355", "is_component": false, "last_tested_version": "1.6.3", - "name": "Auth Guideline ", + "name": "Auth Guidelines", "tags": [ - "prior-auth" + "prior-auth", + "chart-review" ] } \ No newline at end of file diff --git a/src/backend/base/langflow/initial_setup/starter_projects/CPT Code Agent.json b/src/backend/base/langflow/initial_setup/starter_projects/CPT Code Agent.json index b6fb35555e8c..bdc30d152c46 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/CPT Code Agent.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/CPT Code Agent.json @@ -6,197 +6,189 @@ "className": "", "data": { "sourceHandle": { - "dataType": "ParseData", - "id": "ParseData-aNGFz", - "name": "text", + "dataType": "BlobStorage", + "id": "BlobStorage-SSNnX", + "name": "file_path", "output_types": [ - "Message" + "Data" ] }, "targetHandle": { - "fieldName": "search_query", - "id": "CPTCode-9u6BH", + "fieldName": "url", + "id": "AzureDocumentIntelligence-sIbd2", "inputTypes": [ - "Message" + "str", + "Data", + "Message", + "list" ], - "type": "str" + "type": "other" } }, - "id": "reactflow__edge-ParseData-aNGFz{œdataTypeœ:œParseDataœ,œidœ:œParseData-aNGFzœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-CPTCode-9u6BH{œfieldNameœ:œsearch_queryœ,œidœ:œCPTCode-9u6BHœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "xy-edge__BlobStorage-SSNnX{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-SSNnXœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}-AzureDocumentIntelligence-sIbd2{œfieldNameœ:œurlœ,œidœ:œAzureDocumentIntelligence-sIbd2œ,œinputTypesœ:[œstrœ,œDataœ,œMessageœ,œlistœ],œtypeœ:œotherœ}", "selected": false, - "source": "ParseData-aNGFz", - "sourceHandle": "{œdataTypeœ: œParseDataœ, œidœ: œParseData-aNGFzœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", - "target": "CPTCode-9u6BH", - "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œCPTCode-9u6BHœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "BlobStorage-SSNnX", + "sourceHandle": "{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-SSNnXœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}", + "target": "AzureDocumentIntelligence-sIbd2", + "targetHandle": "{œfieldNameœ:œurlœ,œidœ:œAzureDocumentIntelligence-sIbd2œ,œinputTypesœ:[œstrœ,œDataœ,œMessageœ,œlistœ],œtypeœ:œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "CPTCode", - "id": "CPTCode-9u6BH", - "name": "prediction", + "dataType": "AzureDocumentIntelligence", + "id": "AzureDocumentIntelligence-sIbd2", + "name": "structured_data", "output_types": [ - "Data" + "DataFrame" ] }, "targetHandle": { - "fieldName": "data", - "id": "ParseData-FJc5T", + "fieldName": "input_data", + "id": "ParserComponent-vGXJE", "inputTypes": [ + "DataFrame", "Data" ], "type": "other" } }, - "id": "reactflow__edge-CPTCode-9u6BH{œdataTypeœ:œCPTCodeœ,œidœ:œCPTCode-9u6BHœ,œnameœ:œpredictionœ,œoutput_typesœ:[œDataœ]}-ParseData-FJc5T{œfieldNameœ:œdataœ,œidœ:œParseData-FJc5Tœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "id": "xy-edge__AzureDocumentIntelligence-sIbd2{œdataTypeœ:œAzureDocumentIntelligenceœ,œidœ:œAzureDocumentIntelligence-sIbd2œ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataFrameœ]}-ParserComponent-vGXJE{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-vGXJEœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", "selected": false, - "source": "CPTCode-9u6BH", - "sourceHandle": "{œdataTypeœ: œCPTCodeœ, œidœ: œCPTCode-9u6BHœ, œnameœ: œpredictionœ, œoutput_typesœ: [œDataœ]}", - "target": "ParseData-FJc5T", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œParseData-FJc5Tœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "source": "AzureDocumentIntelligence-sIbd2", + "sourceHandle": "{œdataTypeœ:œAzureDocumentIntelligenceœ,œidœ:œAzureDocumentIntelligence-sIbd2œ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataFrameœ]}", + "target": "ParserComponent-vGXJE", + "targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-vGXJEœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "BlobStorage", - "id": "BlobStorage-x6Wd4", - "name": "file_path", + "dataType": "ParserComponent", + "id": "ParserComponent-vGXJE", + "name": "parsed_text", "output_types": [ - "Data" + "Message" ] }, "targetHandle": { - "fieldName": "url", - "id": "azure_ocr-kDLMl", + "fieldName": "search_query", + "id": "AutonomizeModel-s9Ad7", "inputTypes": [ - "str", - "Data", - "Message", - "list" + "Message" ], - "type": "other" + "type": "str" } }, - "id": "xy-edge__BlobStorage-x6Wd4{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-x6Wd4œ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}-azure_ocr-kDLMl{œfieldNameœ:œurlœ,œidœ:œazure_ocr-kDLMlœ,œinputTypesœ:[œstrœ,œDataœ,œMessageœ,œlistœ],œtypeœ:œotherœ}", + "id": "xy-edge__ParserComponent-vGXJE{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-vGXJEœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-AutonomizeModel-s9Ad7{œfieldNameœ:œsearch_queryœ,œidœ:œAutonomizeModel-s9Ad7œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "BlobStorage-x6Wd4", - "sourceHandle": "{œdataTypeœ: œBlobStorageœ, œidœ: œBlobStorage-x6Wd4œ, œnameœ: œfile_pathœ, œoutput_typesœ: [œDataœ]}", - "target": "azure_ocr-kDLMl", - "targetHandle": "{œfieldNameœ: œurlœ, œidœ: œazure_ocr-kDLMlœ, œinputTypesœ: [œstrœ, œDataœ, œMessageœ, œlistœ], œtypeœ: œotherœ}" + "source": "ParserComponent-vGXJE", + "sourceHandle": "{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-vGXJEœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}", + "target": "AutonomizeModel-s9Ad7", + "targetHandle": "{œfieldNameœ:œsearch_queryœ,œidœ:œAutonomizeModel-s9Ad7œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "azure_ocr", - "id": "azure_ocr-kDLMl", - "name": "structured_data", + "dataType": "AutonomizeModel", + "id": "AutonomizeModel-s9Ad7", + "name": "prediction", "output_types": [ "Data" ] }, "targetHandle": { - "fieldName": "data", - "id": "ParseData-aNGFz", + "fieldName": "input_data", + "id": "ParserComponent-guFKQ", "inputTypes": [ + "DataFrame", "Data" ], "type": "other" } }, - "id": "xy-edge__azure_ocr-kDLMl{œdataTypeœ:œazure_ocrœ,œidœ:œazure_ocr-kDLMlœ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataœ]}-ParseData-aNGFz{œfieldNameœ:œdataœ,œidœ:œParseData-aNGFzœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "id": "xy-edge__AutonomizeModel-s9Ad7{œdataTypeœ:œAutonomizeModelœ,œidœ:œAutonomizeModel-s9Ad7œ,œnameœ:œpredictionœ,œoutput_typesœ:[œDataœ]}-ParserComponent-guFKQ{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-guFKQœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", "selected": false, - "source": "azure_ocr-kDLMl", - "sourceHandle": "{œdataTypeœ: œazure_ocrœ, œidœ: œazure_ocr-kDLMlœ, œnameœ: œstructured_dataœ, œoutput_typesœ: [œDataœ]}", - "target": "ParseData-aNGFz", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œParseData-aNGFzœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "source": "AutonomizeModel-s9Ad7", + "sourceHandle": "{œdataTypeœ:œAutonomizeModelœ,œidœ:œAutonomizeModel-s9Ad7œ,œnameœ:œpredictionœ,œoutput_typesœ:[œDataœ]}", + "target": "ParserComponent-guFKQ", + "targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-guFKQœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "ParseData", - "id": "ParseData-FJc5T", - "name": "data_object_list", - "output_types": [] + "dataType": "ParserComponent", + "id": "ParserComponent-guFKQ", + "name": "parsed_text", + "output_types": [ + "Message" + ] }, "targetHandle": { - "fieldName": "data", - "id": "JSONOutput-cOQuo", + "fieldName": "input_value", + "id": "ChatOutput-l6Kws", "inputTypes": [ - "Data" + "Data", + "DataFrame", + "Message" ], "type": "other" } }, - "id": "xy-edge__ParseData-FJc5T{œdataTypeœ:œParseDataœ,œidœ:œParseData-FJc5Tœ,œnameœ:œdata_object_listœ,œoutput_typesœ:[œDataœ]}-JSONOutput-cOQuo{œfieldNameœ:œdataœ,œidœ:œJSONOutput-cOQuoœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "id": "xy-edge__ParserComponent-guFKQ{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-guFKQœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-l6Kws{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-l6Kwsœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", "selected": false, - "source": "ParseData-FJc5T", - "sourceHandle": "{œdataTypeœ: œParseDataœ, œidœ: œParseData-FJc5Tœ, œnameœ: œdata_object_listœ, œoutput_typesœ: []}", - "target": "JSONOutput-cOQuo", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œJSONOutput-cOQuoœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "source": "ParserComponent-guFKQ", + "sourceHandle": "{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-guFKQœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}", + "target": "ChatOutput-l6Kws", + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-l6Kwsœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}" } ], "nodes": [ { "data": { - "id": "ParseData-aNGFz", + "id": "BlobStorage-SSNnX", "node": { "base_classes": [ - "Data", - "Dict", - "Message" + "Data" ], "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Convert Data objects into Messages using any {field_name} from input data.", - "display_name": "Parse Data", - "documentation": "", + "description": "Load files from Azure Blob Storage", + "display_name": "Blob Storage", + "documentation": "http://docs.langflow.org/components/storage", "edited": false, "field_order": [ - "data", - "template", - "sep" + "storage_account", + "container_name", + "file_name", + "return_all_files" ], "frozen": false, - "icon": "message-square", - "legacy": true, - "lf_version": "1.4.3", - "metadata": { - "legacy_name": "Parse Data" - }, + "icon": "Autonomize", + "last_updated": "2025-10-28T19:13:13.209Z", + "legacy": false, + "lf_version": "1.6.3", + "metadata": {}, "minimized": false, "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "Message", - "group_outputs": false, - "method": "parse_data", - "name": "text", - "selected": "Message", - "tool_mode": true, - "types": [ - "Message" - ], - "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "Data List", + "display_name": "File Path", "group_outputs": false, - "method": "parse_data_as_list", - "name": "data_list", - "selected": "Dict", + "method": "get_file_paths", + "name": "file_path", + "options": null, + "required_inputs": null, + "selected": "Data", "tool_mode": true, "types": [ "Data" @@ -223,152 +215,304 @@ "show": true, "title_case": false, "type": "code", - "value": "from langflow.custom.custom_component.component import Component\nfrom langflow.helpers.data import data_to_text, data_to_text_list\nfrom langflow.io import DataInput, MultilineInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.message import Message\n\n\nclass ParseDataComponent(Component):\n display_name = \"Data to Message\"\n description = \"Convert Data objects into Messages using any {field_name} from input data.\"\n icon = \"message-square\"\n name = \"ParseData\"\n legacy = True\n replacement = [\"processing.DataOperations\", \"processing.TypeConverterComponent\"]\n metadata = {\n \"legacy_name\": \"Parse Data\",\n }\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The data to convert to text.\",\n is_list=True,\n required=True,\n ),\n MultilineInput(\n name=\"template\",\n display_name=\"Template\",\n info=\"The template to use for formatting the data. \"\n \"It can contain the keys {text}, {data} or any other key in the Data.\",\n value=\"{text}\",\n required=True,\n ),\n StrInput(name=\"sep\", display_name=\"Separator\", advanced=True, value=\"\\n\"),\n ]\n\n outputs = [\n Output(\n display_name=\"Message\",\n name=\"text\",\n info=\"Data as a single Message, with each input Data separated by Separator\",\n method=\"parse_data\",\n ),\n Output(\n display_name=\"Data List\",\n name=\"data_list\",\n info=\"Data as a list of new Data, each having `text` formatted by Template\",\n method=\"parse_data_as_list\",\n ),\n ]\n\n def _clean_args(self) -> tuple[list[Data], str, str]:\n data = self.data if isinstance(self.data, list) else [self.data]\n template = self.template\n sep = self.sep\n return data, template, sep\n\n def parse_data(self) -> Message:\n data, template, sep = self._clean_args()\n result_string = data_to_text(template, data, sep)\n self.status = result_string\n return Message(text=result_string)\n\n def parse_data_as_list(self) -> list[Data]:\n data, template, _ = self._clean_args()\n text_list, data_list = data_to_text_list(template, data)\n for item, text in zip(data_list, text_list, strict=True):\n item.set_text(text)\n self.status = data_list\n return data_list\n" + "value": "\"\"\"Blob Storage Component for loading files from Azure Blob Storage.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.services.deps import get_flexstore_service\nfrom loguru import logger\n\n\nclass BlobStorageComponent(Component):\n display_name = \"Blob Storage\"\n category: str = \"input_output\"\n description = \"Load files from Azure Blob Storage\"\n documentation = \"http://docs.langflow.org/components/storage\"\n icon = \"Autonomize\"\n name = \"BlobStorage\"\n\n # Match the property name expected by FileComponent\n FILE_PATH_FIELD = \"file_path\"\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._container_list: list[str] = []\n self._file_list: list[str] = []\n\n inputs = [\n StrInput(\n name=\"storage_account\",\n display_name=\"Storage Account\",\n required=False,\n info=\"Storage Account name\",\n advanced=True,\n ),\n DropdownInput(\n name=\"container_name\",\n display_name=\"Container\",\n info=\"Select a container from the storage account\",\n required=True,\n refresh_button=True,\n ),\n DropdownInput(\n name=\"file_name\",\n display_name=\"File\",\n info=\"Select a file from the container\",\n required=True,\n refresh_button=True,\n ),\n BoolInput(\n name=\"return_all_files\",\n display_name=\"Return All Files\",\n info=\"If true and no specific file is selected, returns all files in the container\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"file_path\", # Match the property name expected by FileComponent\n display_name=\"File Path\",\n method=\"get_file_paths\",\n ),\n ]\n\n async def update_build_config(\n self, build_config: dict, field_value: Any, field_name: str | None = None\n ):\n \"\"\"Update the build configuration based on field changes.\"\"\"\n logger.info(f\"update_build_config called with field_name: {field_name}\")\n\n storage_account = getattr(self, \"storage_account\", None)\n container_name = getattr(self, \"container_name\", None)\n\n if field_name == \"container_name\":\n try:\n # Load the container options when the field is refreshed\n service = get_flexstore_service()\n self._container_list = await service.get_containers(storage_account)\n\n build_config[\"container_name\"][\"options\"] = self._container_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating container list: {e!s}\")\n raise\n\n elif field_name == \"file_name\" and container_name:\n try:\n # Load the file options when the field is refreshed\n service = get_flexstore_service()\n self._file_list = await service.get_files(\n storage_account, container_name\n )\n\n build_config[\"file_name\"][\"options\"] = self._file_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating file list: {e!s}\")\n raise\n\n return build_config\n\n async def get_file_paths(self) -> list[Data]:\n \"\"\"Get file paths for the FileComponent to process.\"\"\"\n try:\n if not self.container_name:\n logger.warning(\"Container name is required.\")\n return []\n\n service = get_flexstore_service()\n file_paths = []\n\n # If a specific file is selected\n if self.file_name:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, self.file_name\n )\n if signed_url:\n file_paths = [Data(data={self.FILE_PATH_FIELD: signed_url})]\n # If no specific file is selected and return_all_files is True\n elif self.return_all_files:\n files = await service.get_files(\n self.storage_account, self.container_name\n )\n for file in files:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, file\n )\n if signed_url:\n file_paths.append(Data(data={self.FILE_PATH_FIELD: signed_url}))\n\n if file_paths:\n self.status = file_paths\n logger.info(f\"Generated {len(file_paths)} file paths\")\n for path in file_paths:\n logger.debug(f\"File path: {path.data.get(self.FILE_PATH_FIELD)}\")\n else:\n logger.warning(\"No file paths generated\")\n\n return file_paths\n\n except Exception as e:\n logger.error(f\"Error in get_file_paths: {e!s}\")\n return []\n" }, - "data": { - "_input_type": "DataInput", + "container_name": { + "_input_type": "DropdownInput", "advanced": false, - "display_name": "Data", + "combobox": false, + "dialog_inputs": {}, + "display_name": "Container", "dynamic": false, - "info": "The data to convert to text.", - "input_types": [ - "Data" + "external_options": {}, + "info": "Select a container from the storage account", + "name": "container_name", + "options": [ + "a-and-g-case-summary", + "a-and-g-case-summary-for-client", + "a-and-g-input", + "a-and-g-ocr-cache", + "a-and-g-ocr-post-processed-cache", + "a-and-g-page-images", + "a-and-g-reimagined", + "ai-studio-v2", + "aircare-dev-4002", + "aml-environment-image-build", + "ang-images", + "anywhere-200-files-input", + "anywhere-200-files-output", + "anywhere-input-docs-test", + "anywhere-output-docs-test", + "argo-flow-artifacts", + "atena", + "athena-data", + "autonomize-mlflow-artifacts", + "azure-webjobs-hosts", + "azure-webjobs-secrets", + "azureml", + "azureml-blobstore-3894b54e-0ee2-4e07-9b73-c3b30dc36b53", + "azureml-environments", + "azureml-metrics", + "backup-of-aws-instance", + "bcbs-ma-interqual-medical-policy", + "bcbs-medical-policy", + "benefit-accumulator-guidelines", + "benefit-check-eoc", + "benefit-eoc-guidelines", + "benefit-eoc-output", + "benefit-eoc-payload", + "bluecross-ca", + "carelon-guidelines", + "carelon-guidelines-v2", + "claims-docs", + "claims-qnext-response", + "cllm-v2-data", + "cms", + "correspondance-examples", + "datavant-storage-container", + "eoc-documents", + "etf-images", + "evicore-guidelines", + "fax-documents", + "fax-images", + "fax-images-2", + "fax-insights", + "fax-ocr-cache", + "fax-processor-validation-datasets", + "genesis-container", + "genesis-correspondence-automation-copilot", + "genesis-correspondence-bulk-ingestion-test", + "genesis-cph-demo-v2", + "genesis-dev-test-3012", + "genesis-dev-v2", + "genesis-platform-3010", + "genesis-platform-anywhere-prod", + "genesis-platform-cigna-dev", + "genesis-platform-demo", + "genesis-platform-demo-v2", + "genesis-platform-dev", + "genesis-platform-molina-uat", + "genesis-platform-qa", + "genesis-platform-v2-lab", + "hedis-page-images", + "hedis-page-ocr", + "indexbackup", + "insights-logs-auditevent", + "insights-metrics-pt1m", + "issue-test-1", + "job-test", + "k-hub-container", + "knowledgehubembeddings", + "load-testing-files-hedis", + "mail-images", + "mlflow", + "mlflow-dev-v2", + "mlflow-integration", + "mlflow-mssql", + "mlflow-qa", + "mlflowmssql", + "mlserver-artifacts", + "mlworkspace-backup", + "model-artifacts", + "modelcards", + "modelhub", + "modelhub-demo-v2", + "modelhub-v2-lab", + "models", + "molina-refactor-temporary", + "molina-refactor-test", + "mosaic-models", + "mosaic-provider-contracts", + "mosaic-provider-contracts-temp", + "mosaic-testing", + "ner-container", + "nestedcontainer", + "pcp-docs", + "pcp-extraction-docs", + "pcp-ocr-cache", + "pharmacy-auth", + "prior-auth", + "prior-authorization", + "projectx-files", + "projectx-temp-files", + "revisions", + "shahabas-mlflow-artifacts", + "snapshots", + "snapshotzips", + "spog-output", + "spog-qnext", + "string", + "temp-directory-genesis-studio", + "temp-studio", + "temp-studio-v2", + "temporal-poc", + "test-cms", + "test-storage-container", + "test1", + "tester-fax", + "umk2dev", + "weaviate-backups" ], - "list": true, - "list_add_label": "Add More", - "name": "data", + "options_metadata": [], "placeholder": "", + "refresh_button": true, "required": true, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, - "type": "other", - "value": "" + "type": "str", + "value": "genesis-container" }, - "sep": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Separator", + "file_name": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "File", "dynamic": false, - "info": "", + "external_options": {}, + "info": "Select a file from the container", + "name": "file_name", + "options": [ + "0108_Subject199-1.pdf", + "0108_Subject199.pdf", + "2222222_Subject2.pdf", + "24-078776909.pdf", + "457889_Jane_foster.pdf", + "93451_Matt_Damon.pdf", + "PriorAuthSample1 (1).pdf", + "PriorAuthSample3 (1).pdf", + "PriorAuthSample5_1 (1).pdf", + "Texas-Contract-Dates.pdf", + "dfw.pdf", + "pre-authorization-sample.pdf", + "prior_auth.png" + ], + "options_metadata": [], + "placeholder": "", + "refresh_button": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "24-078776909.pdf" + }, + "return_all_files": { + "_input_type": "BoolInput", + "advanced": false, + "display_name": "Return All Files", + "dynamic": false, + "info": "If true and no specific file is selected, returns all files in the container", "list": false, "list_add_label": "Add More", - "load_from_db": false, - "name": "sep", + "name": "return_all_files", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "\n" + "type": "bool", + "value": true }, - "template": { - "_input_type": "MultilineInput", - "advanced": false, - "copy_field": false, - "display_name": "Template", + "storage_account": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Storage Account", "dynamic": false, - "info": "The template to use for formatting the data. It can contain the keys {text}, {data} or any other key in the Data.", - "input_types": [ - "Message" - ], + "info": "Storage Account name", "list": false, "list_add_label": "Add More", "load_from_db": false, - "multiline": true, - "name": "template", + "name": "storage_account", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "{text}" + "value": "" } }, "tool_mode": false }, "showNode": true, - "type": "ParseData" + "type": "BlobStorage" }, - "id": "ParseData-aNGFz", + "dragging": false, + "id": "BlobStorage-SSNnX", "measured": { - "height": 397, + "height": 329, "width": 320 }, "position": { - "x": -2493.2708490034456, - "y": -2051.654234234234 + "x": 4006.874939841522, + "y": 81.76916067574258 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "ParseData-FJc5T", + "id": "AzureDocumentIntelligence-sIbd2", "node": { "base_classes": [ - "Data", - "Dict", - "Message" + "DataFrame" ], "beta": false, + "category": "models", "conditional_paths": [], "custom_fields": {}, - "description": "Convert Data objects into Messages using any {field_name} from input data.", - "display_name": "Parse Data", - "documentation": "", + "description": "Process documents using Azure Document Intelligence (formerly Form Recognizer) for OCR, form extraction, and document analysis", + "display_name": "Azure Document Intelligence", + "documentation": "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/", "edited": false, "field_order": [ - "data", - "template", - "sep" + "url", + "file_path", + "silent_errors", + "delete_server_file_after_processing", + "ignore_unsupported_extensions", + "ignore_unspecified_files", + "model_type", + "extract_tables", + "include_confidence", + "use_multithreading", + "concurrency_multithreading" ], "frozen": false, - "icon": "message-square", - "legacy": true, - "metadata": { - "legacy_name": "Parse Data" - }, + "icon": "Azure", + "key": "AzureDocumentIntelligence", + "legacy": false, + "lf_version": "1.6.3", + "metadata": {}, "minimized": false, "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "Message", - "group_outputs": false, - "method": "parse_data", - "name": "text", - "selected": "Message", - "tool_mode": true, - "types": [ - "Message" - ], - "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "Data List", + "display_name": "Structured Data", "group_outputs": false, - "method": "parse_data_as_list", - "name": "data_list", - "selected": "Dict", + "method": "load_files", + "name": "structured_data", + "selected": "DataFrame", "tool_mode": true, "types": [ - "Data" + "DataFrame" ], "value": "__UNDEFINED__" } ], "pinned": false, + "priority": 3, + "score": 0.10489765225226892, "template": { "_type": "Component", "code": { @@ -387,114 +531,266 @@ "show": true, "title_case": false, "type": "code", - "value": "from langflow.custom.custom_component.component import Component\nfrom langflow.helpers.data import data_to_text, data_to_text_list\nfrom langflow.io import DataInput, MultilineInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.message import Message\n\n\nclass ParseDataComponent(Component):\n display_name = \"Data to Message\"\n description = \"Convert Data objects into Messages using any {field_name} from input data.\"\n icon = \"message-square\"\n name = \"ParseData\"\n legacy = True\n replacement = [\"processing.DataOperations\", \"processing.TypeConverterComponent\"]\n metadata = {\n \"legacy_name\": \"Parse Data\",\n }\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The data to convert to text.\",\n is_list=True,\n required=True,\n ),\n MultilineInput(\n name=\"template\",\n display_name=\"Template\",\n info=\"The template to use for formatting the data. \"\n \"It can contain the keys {text}, {data} or any other key in the Data.\",\n value=\"{text}\",\n required=True,\n ),\n StrInput(name=\"sep\", display_name=\"Separator\", advanced=True, value=\"\\n\"),\n ]\n\n outputs = [\n Output(\n display_name=\"Message\",\n name=\"text\",\n info=\"Data as a single Message, with each input Data separated by Separator\",\n method=\"parse_data\",\n ),\n Output(\n display_name=\"Data List\",\n name=\"data_list\",\n info=\"Data as a list of new Data, each having `text` formatted by Template\",\n method=\"parse_data_as_list\",\n ),\n ]\n\n def _clean_args(self) -> tuple[list[Data], str, str]:\n data = self.data if isinstance(self.data, list) else [self.data]\n template = self.template\n sep = self.sep\n return data, template, sep\n\n def parse_data(self) -> Message:\n data, template, sep = self._clean_args()\n result_string = data_to_text(template, data, sep)\n self.status = result_string\n return Message(text=result_string)\n\n def parse_data_as_list(self) -> list[Data]:\n data, template, _ = self._clean_args()\n text_list, data_list = data_to_text_list(template, data)\n for item, text in zip(data_list, text_list, strict=True):\n item.set_text(text)\n self.status = data_list\n return data_list\n" + "value": "\"\"\"Azure Document Intelligence Component - Form recognition and document processing.\"\"\"\n\nimport asyncio\nimport concurrent.futures\nimport mimetypes\nimport os\nimport tempfile\nfrom pathlib import Path\nfrom urllib.parse import unquote, urlparse\n\nimport aiohttp\nimport requests\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, Output\nfrom langflow.schema.data import Data\nfrom loguru import logger\n\n\nclass AzureDocumentIntelligenceComponent(BaseFileComponent):\n \"\"\"Component for Azure Document Intelligence - advanced document processing and form recognition.\"\"\"\n\n display_name: str = \"Azure Document Intelligence\"\n description: str = \"Process documents using Azure Document Intelligence (formerly Form Recognizer) for OCR, form extraction, and document analysis\"\n documentation: str = \"https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/\"\n icon: str = \"Azure\"\n name: str = \"AzureDocumentIntelligence\"\n category: str = \"models\"\n priority: int = 3 # High priority for document processing\n\n VALID_EXTENSIONS = [\"pdf\", \"jpg\", \"jpeg\", \"png\", \"bmp\", \"tiff\", \"tif\"]\n\n inputs = [\n HandleInput(\n name=\"url\",\n display_name=\"URL\",\n info=\"URL to the document to process\",\n input_types=[\"str\", \"Data\", \"Message\", \"list\"],\n required=False,\n ),\n # Include only the HandleInput and BoolInputs from base_inputs\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"file_path\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"silent_errors\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"delete_server_file_after_processing\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unsupported_extensions\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unspecified_files\"\n ),\n DropdownInput(\n name=\"model_type\",\n display_name=\"Model Type\",\n options=[\"prebuilt-document\", \"prebuilt-read\", \"prebuilt-layout\"],\n value=\"prebuilt-document\",\n info=\"Choose the Form Recognizer model to use\",\n ),\n BoolInput(\n name=\"extract_tables\",\n display_name=\"Extract Tables\",\n value=True,\n info=\"Extract and format tables from the document\",\n ),\n BoolInput(\n name=\"include_confidence\",\n display_name=\"Include Confidence Scores\",\n value=False,\n advanced=True,\n info=\"Include confidence scores in the extracted text\",\n ),\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"Use Concurrent Processing\",\n value=True,\n info=\"Enable concurrent processing of multiple files\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"Number of files to process concurrently\",\n value=2,\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Structured Data\", name=\"structured_data\", method=\"load_files\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self.temp_dir = tempfile.mkdtemp()\n self._downloaded_files = {}\n self._text_content = \"\"\n\n def get_text_content(self) -> str:\n \"\"\"Return the concatenated text content from all processed pages.\"\"\"\n return self._text_content\n\n def _extract_filename_from_url(self, url: str) -> str:\n \"\"\"Extract filename from URL or generate a default one.\"\"\"\n try:\n logger.debug(f\"Extracting filename from URL: {url}\")\n parsed_url = urlparse(url)\n path = unquote(parsed_url.path)\n filename = os.path.basename(path)\n\n if filename and \".\" in filename:\n logger.debug(f\"Found filename in URL path: {filename}\")\n return filename\n\n response = requests.head(url, allow_redirects=True)\n if \"content-disposition\" in response.headers:\n content_disp = response.headers[\"content-disposition\"]\n if \"filename=\" in content_disp:\n filename = content_disp.split(\"filename=\")[1].strip(\"\\\"'\")\n logger.debug(f\"Found filename in content-disposition: {filename}\")\n return filename\n\n if \"content-type\" in response.headers:\n ext = mimetypes.guess_extension(response.headers[\"content-type\"])\n if ext:\n filename = f\"downloaded{ext}\"\n logger.debug(f\"Generated filename from content-type: {filename}\")\n return filename\n\n logger.debug(\"Using default filename: downloaded.pdf\")\n return \"downloaded.pdf\"\n except Exception as e:\n logger.error(f\"Error extracting filename from URL: {e!s}\")\n return \"downloaded.pdf\"\n\n async def _download_file_from_url(self, url: str) -> str | None:\n \"\"\"Download a file from a URL.\"\"\"\n try:\n logger.debug(f\"Attempting to download file from URL: {url}\")\n filename = self._extract_filename_from_url(url)\n local_path = os.path.join(self.temp_dir, filename)\n logger.debug(f\"Local path for download: {local_path}\")\n\n async with aiohttp.ClientSession() as session:\n async with session.get(url) as response:\n response.raise_for_status()\n with open(local_path, \"wb\") as f:\n while True:\n chunk = await response.content.read(8192)\n if not chunk:\n break\n f.write(chunk)\n\n self._downloaded_files[url] = local_path\n logger.info(f\"Successfully downloaded file to {local_path}\")\n return local_path\n\n except Exception as e:\n logger.error(f\"Error downloading file from URL: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n def _extract_url_from_input(self, input_data) -> str | None:\n \"\"\"Extract URL string from various input types.\"\"\"\n logger.debug(f\"Extracting URL from input data type: {type(input_data)}\")\n\n # Handle list of Data objects (from blob storage)\n if isinstance(input_data, list):\n logger.debug(f\"Processing list input with {len(input_data)} items\")\n if input_data and isinstance(input_data[0], Data):\n url = input_data[0].data.get(\"file_path\")\n logger.debug(f\"Extracted URL from first Data object in list: {url}\")\n return url\n return None\n\n if isinstance(input_data, str):\n logger.debug(f\"Input is string: {input_data}\")\n return input_data\n elif isinstance(input_data, Data):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from Data object: {url}\")\n return url\n elif hasattr(input_data, \"text\"):\n logger.debug(f\"Extracted URL from text attribute: {input_data.text}\")\n return input_data.text\n elif hasattr(input_data, \"data\"):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from data attribute: {url}\")\n return url\n logger.debug(\"No URL found in input data\")\n return None\n\n def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Handle URLs and local paths.\"\"\"\n resolved_files = []\n logger.debug(\"Starting path validation and resolution\")\n\n # Handle URL input if provided\n if hasattr(self, \"url\") and self.url:\n try:\n logger.debug(f\"Processing URL input: {self.url}\")\n # Extract URL from different input types\n url = self._extract_url_from_input(self.url)\n if not url:\n logger.warning(\"No valid URL found in input\")\n return resolved_files\n\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(url)\n )\n finally:\n loop.close()\n\n if local_path:\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": url,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n except Exception as e:\n logger.error(f\"Error processing URL {url}: {e!s}\")\n if not self.silent_errors:\n raise\n\n # Handle file_path input\n file_path = self._file_path_as_list()\n logger.debug(f\"Processing file_path input: {file_path}\")\n for obj in file_path:\n server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)\n logger.debug(f\"Processing server file path: {server_file_path}\")\n\n if not server_file_path:\n if not self.ignore_unspecified_files:\n msg = f\"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property.\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n try:\n # Check if it's a URL\n if isinstance(server_file_path, str) and server_file_path.startswith(\n (\"http://\", \"https://\")\n ):\n logger.debug(f\"Processing URL from file_path: {server_file_path}\")\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(server_file_path)\n )\n finally:\n loop.close()\n\n if not local_path:\n continue\n\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": server_file_path,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n else:\n # Handle local files\n resolved_path = Path(self.resolve_path(str(server_file_path)))\n logger.debug(f\"Resolved local file path: {resolved_path}\")\n if not resolved_path.exists():\n msg = f\"File not found: {server_file_path}\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n obj,\n resolved_path,\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n\n except Exception as e:\n logger.error(f\"Error processing path {server_file_path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n logger.debug(f\"Resolved {len(resolved_files)} files\")\n return resolved_files\n\n async def process_file(\n self, file_path: str, *, silent_errors: bool = False\n ) -> tuple[Data, str]:\n \"\"\"Process a single file using the OCR service.\"\"\"\n try:\n from langflow.services.deps import get_document_intelligence_service\n \n # Create OCR service directly\n ocr_service = get_document_intelligence_service()\n\n with open(file_path, \"rb\") as file:\n file_content = file.read()\n\n extracted_content, plain_text = await ocr_service.process_document(\n file_content=file_content,\n model_type=self.model_type,\n include_confidence=self.include_confidence,\n extract_tables=self.extract_tables,\n )\n\n structured_data = Data(\n text=plain_text,\n data={\n self.SERVER_FILE_PATH_FIELDNAME: str(file_path),\n \"result\": extracted_content,\n },\n )\n\n return structured_data, plain_text\n\n except Exception as e:\n logger.error(f\"Error processing file {file_path}: {e!s}\")\n if not silent_errors:\n raise\n return None, \"\"\n\n def process_files(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process multiple files with concurrent processing.\"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n concurrency = (\n 1\n if not self.use_multithreading\n else max(1, self.concurrency_multithreading)\n )\n file_count = len(file_list)\n\n logger.info(f\"Processing {file_count} files with concurrency: {concurrency}\")\n\n all_plain_text = []\n processed_data = []\n\n if concurrency > 1 and file_count > 1:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n with concurrent.futures.ThreadPoolExecutor(\n max_workers=concurrency\n ) as executor:\n future_to_file = {\n executor.submit(\n lambda path: loop.run_until_complete(\n self.process_file(\n str(path), silent_errors=self.silent_errors\n )\n ),\n file.path,\n ): file\n for file in file_list\n }\n for future in concurrent.futures.as_completed(future_to_file):\n try:\n structured_data, plain_text = future.result()\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error in concurrent processing: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n else:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n for file in file_list:\n try:\n structured_data, plain_text = loop.run_until_complete(\n self.process_file(\n str(file.path), silent_errors=self.silent_errors\n )\n )\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error processing file {file.path}: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n\n # Store concatenated text content\n self._text_content = \"\\n\\n=== NEW DOCUMENT ===\\n\\n\".join(all_plain_text)\n\n return self.rollup_data(file_list, processed_data)\n\n def __del__(self):\n \"\"\"Cleanup temporary files and directory.\"\"\"\n try:\n if hasattr(self, \"temp_dir\") and os.path.exists(self.temp_dir):\n # Remove downloaded files\n for file_path in self._downloaded_files.values():\n if os.path.exists(file_path):\n os.unlink(file_path)\n # Remove the temporary directory\n os.rmdir(self.temp_dir)\n except Exception as e:\n logger.error(f\"Error cleaning up temporary files: {e!s}\")\n" }, - "data": { - "_input_type": "DataInput", - "advanced": false, - "display_name": "Data", + "concurrency_multithreading": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Processing Concurrency", "dynamic": false, - "info": "The data to convert to text.", - "input_types": [ - "Data" - ], - "list": true, + "info": "Number of files to process concurrently", + "list": false, "list_add_label": "Add More", - "name": "data", + "name": "concurrency_multithreading", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, - "type": "other", - "value": "" + "type": "int", + "value": 2 }, - "sep": { - "_input_type": "StrInput", + "delete_server_file_after_processing": { + "_input_type": "BoolInput", "advanced": true, - "display_name": "Separator", + "display_name": "Delete Server File After Processing", "dynamic": false, - "info": "", + "info": "If true, the Server File Path will be deleted after processing.", "list": false, "list_add_label": "Add More", - "load_from_db": false, - "name": "sep", + "name": "delete_server_file_after_processing", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "\n" + "type": "bool", + "value": true }, - "template": { - "_input_type": "MultilineInput", + "extract_tables": { + "_input_type": "BoolInput", "advanced": false, - "copy_field": false, - "display_name": "Template", + "display_name": "Extract Tables", "dynamic": false, - "info": "The template to use for formatting the data. It can contain the keys {text}, {data} or any other key in the Data.", + "info": "Extract and format tables from the document", + "list": false, + "list_add_label": "Add More", + "name": "extract_tables", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "file_path": { + "_input_type": "HandleInput", + "advanced": true, + "display_name": "Server File Path", + "dynamic": false, + "info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.", "input_types": [ + "Data", "Message" ], - "list": false, + "list": true, "list_add_label": "Add More", - "load_from_db": false, - "multiline": true, - "name": "template", + "name": "file_path", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, - "type": "str", - "value": "{value}" - } + "type": "other", + "value": "" + }, + "ignore_unspecified_files": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Ignore Unspecified Files", + "dynamic": false, + "info": "If true, Data with no 'file_path' property will be ignored.", + "list": false, + "list_add_label": "Add More", + "name": "ignore_unspecified_files", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + }, + "ignore_unsupported_extensions": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Ignore Unsupported Extensions", + "dynamic": false, + "info": "If true, files with unsupported extensions will not be processed.", + "list": false, + "list_add_label": "Add More", + "name": "ignore_unsupported_extensions", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "include_confidence": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Include Confidence Scores", + "dynamic": false, + "info": "Include confidence scores in the extracted text", + "list": false, + "list_add_label": "Add More", + "name": "include_confidence", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + }, + "model_type": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model Type", + "dynamic": false, + "external_options": {}, + "info": "Choose the Form Recognizer model to use", + "name": "model_type", + "options": [ + "prebuilt-document", + "prebuilt-read", + "prebuilt-layout" + ], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "prebuilt-document" + }, + "silent_errors": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Silent Errors", + "dynamic": false, + "info": "If true, errors will not raise an exception.", + "list": false, + "list_add_label": "Add More", + "name": "silent_errors", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + }, + "url": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "URL", + "dynamic": false, + "info": "URL to the document to process", + "input_types": [ + "str", + "Data", + "Message", + "list" + ], + "list": false, + "list_add_label": "Add More", + "name": "url", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "use_multithreading": { + "_input_type": "BoolInput", + "advanced": false, + "display_name": "Use Concurrent Processing", + "dynamic": false, + "info": "Enable concurrent processing of multiple files", + "list": false, + "list_add_label": "Add More", + "name": "use_multithreading", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + } }, "tool_mode": false }, "showNode": true, - "type": "ParseData" + "type": "AzureDocumentIntelligence" }, - "id": "ParseData-FJc5T", + "id": "AzureDocumentIntelligence-sIbd2", "measured": { - "height": 397, + "height": 365, "width": 320 }, "position": { - "x": -1716.2494158341083, - "y": -1974.3632430266846 + "x": 4401.222432919379, + "y": 108.59058943287623 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "JSONOutput-cOQuo", + "id": "ParserComponent-vGXJE", "node": { "base_classes": [ "Message" ], "beta": false, + "category": "processing", "conditional_paths": [], "custom_fields": {}, - "description": "Display input data as JSON in the Playground.", - "display_name": "JSON Output", - "documentation": "", + "description": "Extracts text using a template.", + "display_name": "Parser", + "documentation": "https://docs.langflow.org/components-processing#parser", "edited": false, "field_order": [ - "data", - "pretty_print" + "input_data", + "mode", + "pattern", + "sep" ], "frozen": false, - "icon": "Braces", + "icon": "braces", + "key": "ParserComponent", "legacy": false, - "lf_version": "1.4.3", + "lf_version": "1.6.3", "metadata": {}, "minimized": false, "output_types": [], @@ -502,10 +798,10 @@ { "allows_loop": false, "cache": true, - "display_name": "JSON", + "display_name": "Parsed Text", "group_outputs": false, - "method": "json_response", - "name": "json", + "method": "parse_combined_text", + "name": "parsed_text", "selected": "Message", "tool_mode": true, "types": [ @@ -515,6 +811,7 @@ } ], "pinned": false, + "score": 0.001, "template": { "_type": "Component", "code": { @@ -533,86 +830,142 @@ "show": true, "title_case": false, "type": "code", - "value": "import json\n\nfrom langflow.base.io.text import TextComponent\nfrom langflow.inputs import DataInput\nfrom langflow.io import BoolInput, Output\nfrom langflow.schema import Data\nfrom langflow.schema.message import Message\n\n\nclass JSONOutputComponent(TextComponent):\n display_name = \"JSON Output\"\n description = \"Display input data as JSON in the Playground.\"\n icon = \"Braces\"\n name = \"JSONOutput\"\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The data to convert to JSON.\",\n is_list=True,\n ),\n BoolInput(\n name=\"pretty_print\",\n display_name=\"Pretty Print\",\n info=\"Format JSON with proper indentation\",\n value=True,\n advanced=True,\n ),\n ]\n outputs = [\n Output(display_name=\"JSON\", name=\"json\", method=\"json_response\"),\n ]\n\n def _process_data(self, data: Data | list[Data]) -> dict | list:\n \"\"\"Convert Data object(s) to dictionary/list format.\"\"\"\n if isinstance(data, list):\n return [item.dict() for item in data]\n return data.dict()\n\n def json_response(self) -> Message:\n try:\n # Process the Data input\n processed_data = self._process_data(self.data)\n\n # Convert to JSON string with optional pretty printing\n if self.pretty_print:\n formatted_json = json.dumps(\n processed_data, indent=2, ensure_ascii=False\n )\n else:\n formatted_json = json.dumps(processed_data, ensure_ascii=False)\n\n message = Message(text=formatted_json)\n self.status = formatted_json\n return message\n\n except Exception as e:\n error_message = f\"Error processing data to JSON: {e!s}\"\n message = Message(text=error_message)\n self.status = error_message\n return message\n" + "value": "from langflow.custom.custom_component.component import Component\nfrom langflow.helpers.data import safe_convert\nfrom langflow.inputs.inputs import BoolInput, HandleInput, MessageTextInput, MultilineInput, TabInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.template.field.base import Output\n\n\nclass ParserComponent(Component):\n display_name = \"Parser\"\n description = \"Extracts text using a template.\"\n documentation: str = \"https://docs.langflow.org/components-processing#parser\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"input_data\",\n display_name=\"Data or DataFrame\",\n input_types=[\"DataFrame\", \"Data\"],\n info=\"Accepts either a DataFrame or a Data object.\",\n required=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Parser\", \"Stringify\"],\n value=\"Parser\",\n info=\"Convert into raw string instead of using a template.\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"pattern\",\n display_name=\"Template\",\n info=(\n \"Use variables within curly brackets to extract column values for DataFrames \"\n \"or key values for Data.\"\n \"For example: `Name: {Name}, Age: {Age}, Country: {Country}`\"\n ),\n value=\"Text: {text}\", # Example default\n dynamic=True,\n show=True,\n required=True,\n ),\n MessageTextInput(\n name=\"sep\",\n display_name=\"Separator\",\n advanced=True,\n value=\"\\n\",\n info=\"String used to separate rows/items.\",\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Parsed Text\",\n name=\"parsed_text\",\n info=\"Formatted text output.\",\n method=\"parse_combined_text\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Dynamically hide/show `template` and enforce requirement based on `stringify`.\"\"\"\n if field_name == \"mode\":\n build_config[\"pattern\"][\"show\"] = self.mode == \"Parser\"\n build_config[\"pattern\"][\"required\"] = self.mode == \"Parser\"\n if field_value:\n clean_data = BoolInput(\n name=\"clean_data\",\n display_name=\"Clean Data\",\n info=(\n \"Enable to clean the data by removing empty rows and lines \"\n \"in each cell of the DataFrame/ Data object.\"\n ),\n value=True,\n advanced=True,\n required=False,\n )\n build_config[\"clean_data\"] = clean_data.to_dict()\n else:\n build_config.pop(\"clean_data\", None)\n\n return build_config\n\n def _clean_args(self):\n \"\"\"Prepare arguments based on input type.\"\"\"\n input_data = self.input_data\n\n match input_data:\n case list() if all(isinstance(item, Data) for item in input_data):\n msg = \"List of Data objects is not supported.\"\n raise ValueError(msg)\n case DataFrame():\n return input_data, None\n case Data():\n return None, input_data\n case dict() if \"data\" in input_data:\n try:\n if \"columns\" in input_data: # Likely a DataFrame\n return DataFrame.from_dict(input_data), None\n # Likely a Data object\n return None, Data(**input_data)\n except (TypeError, ValueError, KeyError) as e:\n msg = f\"Invalid structured input provided: {e!s}\"\n raise ValueError(msg) from e\n case _:\n msg = f\"Unsupported input type: {type(input_data)}. Expected DataFrame or Data.\"\n raise ValueError(msg)\n\n def parse_combined_text(self) -> Message:\n \"\"\"Parse all rows/items into a single text or convert input to string if `stringify` is enabled.\"\"\"\n # Early return for stringify option\n if self.mode == \"Stringify\":\n return self.convert_to_string()\n\n df, data = self._clean_args()\n\n lines = []\n if df is not None:\n for _, row in df.iterrows():\n formatted_text = self.pattern.format(**row.to_dict())\n lines.append(formatted_text)\n elif data is not None:\n formatted_text = self.pattern.format(**data.data)\n lines.append(formatted_text)\n\n combined_text = self.sep.join(lines)\n self.status = combined_text\n return Message(text=combined_text)\n\n def convert_to_string(self) -> Message:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n result = \"\"\n if isinstance(self.input_data, list):\n result = \"\\n\".join([safe_convert(item, clean_data=self.clean_data or False) for item in self.input_data])\n else:\n result = safe_convert(self.input_data or False)\n self.log(f\"Converted to string with length: {len(result)}\")\n\n message = Message(text=result)\n self.status = message\n return message\n" }, - "data": { - "_input_type": "DataInput", + "input_data": { + "_input_type": "HandleInput", "advanced": false, - "display_name": "Data", + "display_name": "Data or DataFrame", "dynamic": false, - "info": "The data to convert to JSON.", + "info": "Accepts either a DataFrame or a Data object.", "input_types": [ + "DataFrame", "Data" ], - "list": true, + "list": false, "list_add_label": "Add More", - "name": "data", + "name": "input_data", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "mode": { + "_input_type": "TabInput", + "advanced": false, + "display_name": "Mode", + "dynamic": false, + "info": "Convert into raw string instead of using a template.", + "name": "mode", + "options": [ + "Parser", + "Stringify" + ], "placeholder": "", + "real_time_refresh": true, "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_metadata": true, + "type": "tab", + "value": "Parser" + }, + "pattern": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "Template", + "dynamic": true, + "info": "Use variables within curly brackets to extract column values for DataFrames or key values for Data.For example: `Name: {Name}, Age: {Age}, Country: {Country}`", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "pattern", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, "trace_as_input": true, "trace_as_metadata": true, - "type": "other", - "value": "" + "type": "str", + "value": "{text}" }, - "pretty_print": { - "_input_type": "BoolInput", + "sep": { + "_input_type": "MessageTextInput", "advanced": true, - "display_name": "Pretty Print", + "display_name": "Separator", "dynamic": false, - "info": "Format JSON with proper indentation", + "info": "String used to separate rows/items.", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", - "name": "pretty_print", + "load_from_db": false, + "name": "sep", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "str", + "value": "\n" } }, "tool_mode": false }, "showNode": true, - "type": "JSONOutput" + "type": "ParserComponent" }, - "id": "JSONOutput-cOQuo", + "dragging": false, + "id": "ParserComponent-vGXJE", "measured": { - "height": 195, + "height": 329, "width": 320 }, "position": { - "x": -1328.8114799884947, - "y": -1915.9673899997772 + "x": 4774.6992870141885, + "y": 124.56619611745914 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "CPTCode-9u6BH", + "id": "AutonomizeModel-s9Ad7", "node": { "base_classes": [ "Data" ], "beta": false, + "category": "models", "conditional_paths": [], "custom_fields": {}, - "description": "Model for CPT Code", - "display_name": "CPT Code", - "documentation": "https://docs.example.com/clinical-llm", - "edited": true, + "description": "Unified interface for Autonomize text-based AI models with dropdown selection", + "display_name": "Autonomize Model", + "documentation": "https://docs.example.com/autonomize-models", + "edited": false, "field_order": [ + "selected_model", "search_query" ], "frozen": false, "icon": "Autonomize", + "key": "AutonomizeModel", + "last_updated": "2025-10-28T19:13:55.650Z", "legacy": false, + "lf_version": "1.6.3", "metadata": {}, "minimized": false, "output_types": [], @@ -620,8 +973,8 @@ { "allows_loop": false, "cache": true, - "display_name": "CPT Code", - "hidden": null, + "display_name": "Model Output", + "group_outputs": false, "method": "build_output", "name": "prediction", "options": null, @@ -635,6 +988,8 @@ } ], "pinned": false, + "priority": 1, + "score": 0.00021002192191843841, "template": { "_type": "Component", "code": { @@ -653,15 +1008,15 @@ "show": true, "title_case": false, "type": "code", - "value": "from langflow.inputs.input_mixin import FieldTypes\nfrom langflow.io import MultilineInput, Output\nfrom langflow.schema import Data\nfrom pydantic import BaseModel\n\nfrom app.base.modelhub import ATModelComponent\nfrom app.services.modelhub.model_endpoint import ModelEndpoint\n\n\nclass Entity(BaseModel):\n Description: str\n Score: float\n Code: str\n\n\nclass CPTDescription(BaseModel):\n data: list[Entity]\n Text: str\n\n\nclass CPTCodeComponent(ATModelComponent):\n \"\"\"Component for the CPT Code model\"\"\"\n\n display_name: str = \"CPT Code\"\n description: str = \"Model for CPT Code\"\n documentation: str = \"https://docs.example.com/clinical-llm\"\n icon: str = \"Autonomize\"\n name: str = \"CPTCode\"\n _model_name = ModelEndpoint.CPT_CODE\n\n inputs = [\n MultilineInput(\n name=\"search_query\",\n display_name=\"Search query\",\n field_type=FieldTypes.TEXT,\n multiline=True,\n )\n ]\n\n outputs = [\n Output(name=\"prediction\", display_name=\"CPT Code\", method=\"build_output\"),\n ]\n\n async def extract_entities(self, text) -> CPTDescription:\n \"\"\"Extract clinical entities from the input text\"\"\"\n # Handle the case where input is a JSON string\n if isinstance(text, str) and text.strip().startswith('{'):\n try:\n import json\n text_dict = json.loads(text)\n text = text_dict\n except json.JSONDecodeError as e:\n # If JSON parsing fails, use the original text\n pass\n\n # Handle the case where input is a dictionary with result structure\n if isinstance(text, dict) and \"result\" in text:\n result = text[\"result\"]\n if isinstance(result, list) and len(result) > 0:\n # Extract text from the first result item\n first_result = result[0]\n if isinstance(first_result, dict) and \"text\" in first_result:\n text = first_result[\"text\"]\n else:\n raise ValueError(\"Expected 'text' field in result[0]\")\n else:\n raise ValueError(\"Expected non-empty 'result' list\")\n elif isinstance(text, dict) and \"text\" in text:\n # Handle case where text is directly in the dict\n text = text[\"text\"]\n\n try:\n response = await self.predict(text=text)\n return CPTDescription(**response[\"data\"])\n except Exception as e:\n msg = f\"Error extracting clinical entities: {e!s}\"\n raise ValueError(msg) from e\n\n async def build_output(self) -> Data:\n \"\"\"Generate the output based on selected knowledgehub hubs.\"\"\"\n query_results = await self.extract_entities(self.search_query)\n data = Data(value=query_results.data)\n self.status = data\n return data\n" + "value": "\"\"\"Autonomize Model Component - Unified text-based model component with dropdown selection.\"\"\"\n\nimport ast\nimport json\nfrom typing import Any\n\nfrom langflow.services.modelhub.model_endpoint import ModelEndpoint\nfrom loguru import logger\n\nfrom langflow.base.modelhub import ATModelComponent\nfrom langflow.inputs.inputs import FieldTypes\nfrom langflow.io import DropdownInput, MultilineInput, Output\nfrom langflow.schema.data import Data\n\n\nclass AutonomizeModelComponent(ATModelComponent):\n \"\"\"Unified component for Autonomize text-based models with dropdown selection.\"\"\"\n\n display_name: str = \"Autonomize Model\"\n description: str = \"Unified interface for Autonomize text-based AI models with dropdown selection\"\n documentation: str = \"https://docs.example.com/autonomize-models\"\n icon: str = \"Autonomize\"\n name: str = \"AutonomizeModel\"\n category: str = \"models\"\n priority: int = 1 # High priority to appear near top\n\n # Model mapping for dropdown options\n MODEL_OPTIONS = {\n \"Clinical LLM\": ModelEndpoint.CLINICAL_LLM,\n \"Clinical Note Classifier\": ModelEndpoint.CLINICAL_NOTE_CLASSIFIER,\n \"Combined Entity Linking\": ModelEndpoint.COMBINED_ENTITY_LINKING,\n \"CPT Code\": ModelEndpoint.CPT_CODE,\n \"ICD-10 Code\": ModelEndpoint.ICD_10,\n \"RxNorm Code\": ModelEndpoint.RXNORM,\n \"Short Summary\": ModelEndpoint.SHORT_SUMMARY,\n \"Detailed Summary\": ModelEndpoint.DETAILED_SUMMARY,\n \"Page Level Classifier\": ModelEndpoint.PAGE_LEVEL_CLASSIFIER\n }\n\n # Model descriptions for UI\n MODEL_DESCRIPTIONS = {\n \"Clinical LLM\": \"Extract clinical entities from medical text\",\n \"Clinical Note Classifier\": \"Classify clinical notes by type\",\n \"Combined Entity Linking\": \"Link extracted entities to standard vocabularies\",\n \"CPT Code\": \"Extract CPT codes from medical text\",\n \"ICD-10 Code\": \"Extract ICD-10 codes from medical text\",\n \"RxNorm Code\": \"Extract RxNorm codes for medications\",\n \"Short Summary\": \"short summary\",\n \"Detailed Summary\": \"detailed summary\",\n \"Page Level Classifier\": \"page type classification\"\n }\n\n inputs = [\n DropdownInput(\n name=\"selected_model\",\n display_name=\"Model\",\n options=list(MODEL_OPTIONS.keys()),\n value=next(iter(MODEL_OPTIONS.keys())),\n info=\"Select the Autonomize model to use\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"search_query\",\n display_name=\"Text Input\",\n field_type=FieldTypes.TEXT,\n multiline=True,\n tool_mode=True,\n info=\"Input text to process with the selected model\",\n ),\n ]\n\n outputs = [\n Output(\n name=\"prediction\",\n display_name=\"Model Output\",\n method=\"build_output\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._current_model_endpoint = None\n # Initialize _model_name with the default model endpoint (required by ATModelComponent)\n self._model_name = self.MODEL_OPTIONS[next(iter(self.MODEL_OPTIONS.keys()))]\n\n @property\n def model_endpoint(self) -> ModelEndpoint:\n \"\"\"Get the current model endpoint based on selection.\"\"\"\n return self.MODEL_OPTIONS[self.selected_model]\n\n @property\n def model_name_from_endpoint(self) -> str:\n \"\"\"Get the model name from the ModelEndpoint.\"\"\"\n return self.model_endpoint.get_model()\n\n async def extract_entities(self, text: Any) -> dict:\n \"\"\"Extract entities using the selected model.\"\"\"\n # Handle different input formats\n if isinstance(text, str) and text.strip().startswith(\"{\"):\n try:\n text_dict = json.loads(text)\n text = text_dict\n except json.JSONDecodeError as e:\n logger.error(f\"Failed to parse JSON string: {e}\")\n # If JSON parsing fails, use the original text\n\n # Handle the case where input is a dictionary with result structure\n if isinstance(text, dict) and \"result\" in text:\n result = text[\"result\"]\n if isinstance(result, list) and len(result) > 0:\n # Extract text from the first result item\n first_result = result[0]\n if isinstance(first_result, dict) and \"text\" in first_result:\n extracted_text = first_result[\"text\"]\n text = extracted_text\n else:\n msg = \"First result item does not contain 'text' key\"\n raise ValueError(msg)\n else:\n msg = \"Result list is empty or not a list\"\n raise ValueError(msg)\n elif isinstance(text, dict) and \"text\" in text:\n text = text[\"text\"]\n elif hasattr(text, \"text\"):\n text = text.text\n\n try:\n # Use the standard predict method from ATModelComponent\n # Set the _model_name based on current selection\n self._model_name = self.model_endpoint\n\n response = await self.predict(text=text)\n\n # Handle string responses\n if isinstance(response, str):\n try:\n response = ast.literal_eval(response)\n except (ValueError, SyntaxError):\n # If it's not a valid Python literal, try JSON\n try:\n response = json.loads(response)\n except json.JSONDecodeError:\n # If neither works, wrap in a dict\n response = {\"result\": response}\n else:\n return response\n except Exception as e:\n msg = f\"Error processing with {self.model_name}: {e!s}\"\n logger.error(f\"API call failed: {msg}\")\n raise ValueError(msg) from e\n\n async def build_output(self) -> Data:\n \"\"\"Generate the output based on selected model.\"\"\"\n query_results = await self.extract_entities(self.search_query)\n\n # Create standardized output format\n output_data = {\n \"model\": self.selected_model,\n \"model_description\": self.MODEL_DESCRIPTIONS.get(self.selected_model, \"\"),\n \"data\": query_results\n }\n\n data = Data(value=output_data)\n self.status = f\"Processed with {self.selected_model}\"\n return data\n\n def build(self):\n \"\"\"Return the main build function for Langflow framework.\"\"\"\n return self.build_output\n" }, "search_query": { "_input_type": "MultilineInput", "advanced": false, "copy_field": false, - "display_name": "Search query", + "display_name": "Text Input", "dynamic": false, - "info": "", + "info": "Input text to process with the selected model", "input_types": [ "Message" ], @@ -674,77 +1029,109 @@ "required": false, "show": true, "title_case": false, - "tool_mode": false, + "tool_mode": true, "trace_as_input": true, "trace_as_metadata": true, "type": "str", "value": "" + }, + "selected_model": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model", + "dynamic": false, + "external_options": {}, + "info": "Select the Autonomize model to use", + "name": "selected_model", + "options": [ + "Clinical LLM", + "Clinical Note Classifier", + "Combined Entity Linking", + "CPT Code", + "ICD-10 Code", + "RxNorm Code", + "Short Summary", + "Detailed Summary", + "Page Level Classifier" + ], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "CPT Code" } }, "tool_mode": false }, "showNode": true, - "type": "CPTCode" + "type": "AutonomizeModel" }, - "id": "CPTCode-9u6BH", + "dragging": false, + "id": "AutonomizeModel-s9Ad7", "measured": { - "height": 233, + "height": 302, "width": 320 }, "position": { - "x": -2104.5837757988065, - "y": -2009.3920071806008 + "x": 5132.840919131094, + "y": 140.38024221093286 }, - "selected": true, + "selected": false, "type": "genericNode" }, { "data": { - "id": "BlobStorage-x6Wd4", + "id": "ChatOutput-l6Kws", "node": { "base_classes": [ - "Data" + "Message" ], "beta": false, - "category": "inputs", "conditional_paths": [], "custom_fields": {}, - "description": "Load files from Azure Blob Storage", - "display_name": "Blob Storage", - "documentation": "http://docs.langflow.org/components/storage", + "description": "Display a chat message in the Playground.", + "display_name": "Chat Output", + "documentation": "https://docs.langflow.org/components-io#chat-output", "edited": false, "field_order": [ - "storage_account", - "container_name", - "file_name", - "return_all_files" + "input_value", + "should_store_message", + "sender", + "sender_name", + "session_id", + "data_template" ], "frozen": false, - "icon": "Autonomize", - "key": "BlobStorage", + "icon": "MessagesSquare", "legacy": false, - "lf_version": "1.4.3", "metadata": {}, - "minimized": false, + "minimized": true, "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "File Path", + "display_name": "Output Message", "group_outputs": false, - "method": "get_file_paths", - "name": "file_path", - "selected": "Data", + "method": "message_response", + "name": "message", + "selected": "Message", "tool_mode": true, "types": [ - "Data" + "Message" ], "value": "__UNDEFINED__" } ], "pinned": false, - "score": 0.007568328950209746, "template": { "_type": "Component", "code": { @@ -763,141 +1150,188 @@ "show": true, "title_case": false, "type": "code", - "value": "\"\"\"Blob Storage Component for loading files from Azure Blob Storage.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.services.deps import get_flexstore_service\nfrom loguru import logger\n\n\nclass BlobStorageComponent(Component):\n display_name = \"Blob Storage\"\n category: str = \"input_output\"\n description = \"Load files from Azure Blob Storage\"\n documentation = \"http://docs.langflow.org/components/storage\"\n icon = \"Autonomize\"\n name = \"BlobStorage\"\n\n # Match the property name expected by FileComponent\n FILE_PATH_FIELD = \"file_path\"\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._container_list: list[str] = []\n self._file_list: list[str] = []\n\n inputs = [\n StrInput(\n name=\"storage_account\",\n display_name=\"Storage Account\",\n required=False,\n info=\"Storage Account name\",\n advanced=True,\n ),\n DropdownInput(\n name=\"container_name\",\n display_name=\"Container\",\n info=\"Select a container from the storage account\",\n required=True,\n refresh_button=True,\n ),\n DropdownInput(\n name=\"file_name\",\n display_name=\"File\",\n info=\"Select a file from the container\",\n required=True,\n refresh_button=True,\n ),\n BoolInput(\n name=\"return_all_files\",\n display_name=\"Return All Files\",\n info=\"If true and no specific file is selected, returns all files in the container\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"file_path\", # Match the property name expected by FileComponent\n display_name=\"File Path\",\n method=\"get_file_paths\",\n ),\n ]\n\n async def update_build_config(\n self, build_config: dict, field_value: Any, field_name: str | None = None\n ):\n \"\"\"Update the build configuration based on field changes.\"\"\"\n logger.info(f\"update_build_config called with field_name: {field_name}\")\n\n storage_account = getattr(self, \"storage_account\", None)\n container_name = getattr(self, \"container_name\", None)\n\n if field_name == \"container_name\":\n try:\n # Load the container options when the field is refreshed\n service = get_flexstore_service()\n self._container_list = await service.get_containers(storage_account)\n\n build_config[\"container_name\"][\"options\"] = self._container_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating container list: {e!s}\")\n raise\n\n elif field_name == \"file_name\" and container_name:\n try:\n # Load the file options when the field is refreshed\n service = get_flexstore_service()\n self._file_list = await service.get_files(\n storage_account, container_name\n )\n\n build_config[\"file_name\"][\"options\"] = self._file_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating file list: {e!s}\")\n raise\n\n return build_config\n\n async def get_file_paths(self) -> list[Data]:\n \"\"\"Get file paths for the FileComponent to process.\"\"\"\n try:\n if not self.container_name:\n logger.warning(\"Container name is required.\")\n return []\n\n service = get_flexstore_service()\n file_paths = []\n\n # If a specific file is selected\n if self.file_name:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, self.file_name\n )\n if signed_url:\n file_paths = [Data(data={self.FILE_PATH_FIELD: signed_url})]\n # If no specific file is selected and return_all_files is True\n elif self.return_all_files:\n files = await service.get_files(\n self.storage_account, self.container_name\n )\n for file in files:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, file\n )\n if signed_url:\n file_paths.append(Data(data={self.FILE_PATH_FIELD: signed_url}))\n\n if file_paths:\n self.status = file_paths\n logger.info(f\"Generated {len(file_paths)} file paths\")\n for path in file_paths:\n logger.debug(f\"File path: {path.data.get(self.FILE_PATH_FIELD)}\")\n else:\n logger.warning(\"No file paths generated\")\n\n return file_paths\n\n except Exception as e:\n logger.error(f\"Error in get_file_paths: {e!s}\")\n return []\n" + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom langflow.base.io.chat import ChatComponent\nfrom langflow.helpers.data import safe_convert\nfrom langflow.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.schema.properties import Source\nfrom langflow.template.field.base import Output\nfrom langflow.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, icon, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, - "container_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Container", + "data_template": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Data Template", "dynamic": false, - "info": "Select a container from the storage account", - "name": "container_name", - "options": [], - "options_metadata": [], + "info": "Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "data_template", "placeholder": "", - "refresh_button": true, - "required": true, + "required": false, "show": true, "title_case": false, - "toggle": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "genesis-container" + "value": "{text}" }, - "file_name": { - "_input_type": "DropdownInput", + "input_value": { + "_input_type": "HandleInput", "advanced": false, + "display_name": "Inputs", + "dynamic": false, + "info": "Message to be passed as output.", + "input_types": [ + "Data", + "DataFrame", + "Message" + ], + "list": false, + "list_add_label": "Add More", + "name": "input_value", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "sender": { + "_input_type": "DropdownInput", + "advanced": true, "combobox": false, "dialog_inputs": {}, - "display_name": "File", + "display_name": "Sender Type", "dynamic": false, - "info": "Select a file from the container", - "name": "file_name", - "options": [], + "external_options": {}, + "info": "Type of sender.", + "name": "sender", + "options": [ + "Machine", + "User" + ], "options_metadata": [], "placeholder": "", - "refresh_button": true, - "required": true, + "required": false, "show": true, "title_case": false, "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "0108_Subject199.pdf" + "value": "Machine" }, - "return_all_files": { - "_input_type": "BoolInput", - "advanced": false, - "display_name": "Return All Files", + "sender_name": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Sender Name", "dynamic": false, - "info": "If true and no specific file is selected, returns all files in the container", + "info": "Name of the sender.", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", - "name": "return_all_files", + "load_from_db": false, + "name": "sender_name", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "str", + "value": "AI" }, - "storage_account": { - "_input_type": "StrInput", + "session_id": { + "_input_type": "MessageTextInput", "advanced": true, - "display_name": "Storage Account", + "display_name": "Session ID", "dynamic": false, - "info": "Storage Account name", + "info": "The session ID of the chat. If empty, the current session ID parameter will be used.", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "name": "storage_account", + "name": "session_id", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, "type": "str", "value": "" + }, + "should_store_message": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Store Messages", + "dynamic": false, + "info": "Store the message in the history.", + "list": false, + "list_add_label": "Add More", + "name": "should_store_message", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true } }, "tool_mode": false }, - "showNode": true, - "type": "BlobStorage" + "selected_output": "message", + "showNode": false, + "type": "ChatOutput" }, "dragging": false, - "id": "BlobStorage-x6Wd4", + "id": "ChatOutput-l6Kws", "measured": { - "height": 365, - "width": 320 + "height": 48, + "width": 192 }, "position": { - "x": -3436.5858201807296, - "y": -2023.7433132972817 + "x": 6007.8127721858755, + "y": 433.75370746219096 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "azure_ocr-kDLMl", + "id": "ParserComponent-guFKQ", "node": { "base_classes": [ - "Data" + "Message" ], "beta": false, - "category": "models", + "category": "processing", "conditional_paths": [], "custom_fields": {}, - "description": "Process documents using Azure Form Recognizer OCR capabilities", - "display_name": "Form Recognizer", - "documentation": "", + "description": "Extracts text using a template.", + "display_name": "Parser", + "documentation": "https://docs.langflow.org/components-processing#parser", "edited": false, "field_order": [ - "url", - "file_path", - "silent_errors", - "delete_server_file_after_processing", - "ignore_unsupported_extensions", - "ignore_unspecified_files", - "model_type", - "extract_tables", - "include_confidence", - "use_multithreading", - "concurrency_multithreading" + "input_data", + "mode", + "pattern", + "sep" ], "frozen": false, - "icon": "Azure", - "key": "azure_ocr", + "icon": "braces", + "key": "ParserComponent", "legacy": false, - "lf_version": "1.4.3", + "lf_version": "1.6.3", "metadata": {}, "minimized": false, "output_types": [], @@ -905,19 +1339,20 @@ { "allows_loop": false, "cache": true, - "display_name": "Structured Data", - "method": "load_files", - "name": "structured_data", - "selected": "Data", + "display_name": "Parsed Text", + "group_outputs": false, + "method": "parse_combined_text", + "name": "parsed_text", + "selected": "Message", "tool_mode": true, "types": [ - "Data" + "Message" ], "value": "__UNDEFINED__" } ], "pinned": false, - "score": 0.007568328950209746, + "score": 0.001, "template": { "_type": "Component", "code": { @@ -936,252 +1371,130 @@ "show": true, "title_case": false, "type": "code", - "value": "\"\"\"Form Recognizer Component for processing and analyzing form data.\"\"\"\n\nimport asyncio\nimport concurrent.futures\nimport mimetypes\nimport os\nimport tempfile\nfrom pathlib import Path\nfrom urllib.parse import unquote, urlparse\n\nimport aiohttp\nimport requests\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, Output\nfrom langflow.schema import Data\nfrom loguru import logger\n\n\nclass FormRecognizerComponent(BaseFileComponent):\n \"\"\"Component for recognizing and processing form data.\"\"\"\n\n display_name = \"Form Recognizer\"\n description = \"Process documents using Azure Form Recognizer OCR capabilities\"\n icon = \"Azure\"\n name = \"azure_ocr\"\n # legacy = True\n\n VALID_EXTENSIONS = [\"pdf\", \"jpg\", \"jpeg\", \"png\", \"bmp\", \"tiff\", \"tif\"]\n\n inputs = [\n HandleInput(\n name=\"url\",\n display_name=\"URL\",\n info=\"URL to the document to process\",\n input_types=[\"str\", \"Data\", \"Message\", \"list\"],\n required=False,\n ),\n # Include only the HandleInput and BoolInputs from base_inputs\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"file_path\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"silent_errors\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"delete_server_file_after_processing\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unsupported_extensions\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unspecified_files\"\n ),\n DropdownInput(\n name=\"model_type\",\n display_name=\"Model Type\",\n options=[\"prebuilt-document\", \"prebuilt-read\", \"prebuilt-layout\"],\n value=\"prebuilt-document\",\n info=\"Choose the Form Recognizer model to use\",\n ),\n BoolInput(\n name=\"extract_tables\",\n display_name=\"Extract Tables\",\n value=True,\n info=\"Extract and format tables from the document\",\n ),\n BoolInput(\n name=\"include_confidence\",\n display_name=\"Include Confidence Scores\",\n value=False,\n advanced=True,\n info=\"Include confidence scores in the extracted text\",\n ),\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"Use Concurrent Processing\",\n value=True,\n info=\"Enable concurrent processing of multiple files\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"Number of files to process concurrently\",\n value=2,\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Structured Data\", name=\"structured_data\", method=\"load_files\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self.temp_dir = tempfile.mkdtemp()\n self._downloaded_files = {}\n self._text_content = \"\"\n\n\n def get_text_content(self) -> str:\n \"\"\"Return the concatenated text content from all processed pages.\"\"\"\n return self._text_content\n\n def _extract_filename_from_url(self, url: str) -> str:\n \"\"\"Extract filename from URL or generate a default one.\"\"\"\n try:\n logger.debug(f\"Extracting filename from URL: {url}\")\n parsed_url = urlparse(url)\n path = unquote(parsed_url.path)\n filename = os.path.basename(path)\n\n if filename and \".\" in filename:\n logger.debug(f\"Found filename in URL path: {filename}\")\n return filename\n\n response = requests.head(url, allow_redirects=True)\n if \"content-disposition\" in response.headers:\n content_disp = response.headers[\"content-disposition\"]\n if \"filename=\" in content_disp:\n filename = content_disp.split(\"filename=\")[1].strip(\"\\\"'\")\n logger.debug(f\"Found filename in content-disposition: {filename}\")\n return filename\n\n if \"content-type\" in response.headers:\n ext = mimetypes.guess_extension(response.headers[\"content-type\"])\n if ext:\n filename = f\"downloaded{ext}\"\n logger.debug(f\"Generated filename from content-type: {filename}\")\n return filename\n\n logger.debug(\"Using default filename: downloaded.pdf\")\n return \"downloaded.pdf\"\n except Exception as e:\n logger.error(f\"Error extracting filename from URL: {e!s}\")\n return \"downloaded.pdf\"\n\n async def _download_file_from_url(self, url: str) -> str | None:\n \"\"\"Download a file from a URL.\"\"\"\n try:\n logger.debug(f\"Attempting to download file from URL: {url}\")\n filename = self._extract_filename_from_url(url)\n local_path = os.path.join(self.temp_dir, filename)\n logger.debug(f\"Local path for download: {local_path}\")\n\n async with aiohttp.ClientSession() as session:\n async with session.get(url) as response:\n response.raise_for_status()\n with open(local_path, \"wb\") as f:\n while True:\n chunk = await response.content.read(8192)\n if not chunk:\n break\n f.write(chunk)\n\n self._downloaded_files[url] = local_path\n logger.info(f\"Successfully downloaded file to {local_path}\")\n return local_path\n\n except Exception as e:\n logger.error(f\"Error downloading file from URL: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n def _extract_url_from_input(self, input_data) -> str | None:\n \"\"\"Extract URL string from various input types.\"\"\"\n logger.debug(f\"Extracting URL from input data type: {type(input_data)}\")\n\n # Handle list of Data objects (from blob storage)\n if isinstance(input_data, list):\n logger.debug(f\"Processing list input with {len(input_data)} items\")\n if input_data and isinstance(input_data[0], Data):\n url = input_data[0].data.get(\"file_path\")\n logger.debug(f\"Extracted URL from first Data object in list: {url}\")\n return url\n return None\n\n if isinstance(input_data, str):\n logger.debug(f\"Input is string: {input_data}\")\n return input_data\n elif isinstance(input_data, Data):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from Data object: {url}\")\n return url\n elif hasattr(input_data, \"text\"):\n logger.debug(f\"Extracted URL from text attribute: {input_data.text}\")\n return input_data.text\n elif hasattr(input_data, \"data\"):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from data attribute: {url}\")\n return url\n logger.debug(\"No URL found in input data\")\n return None\n\n def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Handle URLs and local paths.\"\"\"\n resolved_files = []\n logger.debug(\"Starting path validation and resolution\")\n\n # Handle URL input if provided\n if hasattr(self, \"url\") and self.url:\n try:\n logger.debug(f\"Processing URL input: {self.url}\")\n # Extract URL from different input types\n url = self._extract_url_from_input(self.url)\n if not url:\n logger.warning(\"No valid URL found in input\")\n return resolved_files\n\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(url)\n )\n finally:\n loop.close()\n\n if local_path:\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": url,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n except Exception as e:\n logger.error(f\"Error processing URL {url}: {e!s}\")\n if not self.silent_errors:\n raise\n\n # Handle file_path input\n file_path = self._file_path_as_list()\n logger.debug(f\"Processing file_path input: {file_path}\")\n for obj in file_path:\n server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)\n logger.debug(f\"Processing server file path: {server_file_path}\")\n\n if not server_file_path:\n if not self.ignore_unspecified_files:\n msg = f\"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property.\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n try:\n # Check if it's a URL\n if isinstance(server_file_path, str) and server_file_path.startswith(\n (\"http://\", \"https://\")\n ):\n logger.debug(f\"Processing URL from file_path: {server_file_path}\")\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(server_file_path)\n )\n finally:\n loop.close()\n\n if not local_path:\n continue\n\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": server_file_path,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n else:\n # Handle local files\n resolved_path = Path(self.resolve_path(str(server_file_path)))\n logger.debug(f\"Resolved local file path: {resolved_path}\")\n if not resolved_path.exists():\n msg = f\"File not found: {server_file_path}\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n obj,\n resolved_path,\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n\n except Exception as e:\n logger.error(f\"Error processing path {server_file_path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n logger.debug(f\"Resolved {len(resolved_files)} files\")\n return resolved_files\n\n async def process_file(\n self, file_path: str, *, silent_errors: bool = False\n ) -> tuple[Data, str]:\n \"\"\"Process a single file using the OCR service.\"\"\"\n try:\n from langflow.services.manager import service_manager\n\n ocr_service = service_manager.get(\"ocr_service\")\n\n with open(file_path, \"rb\") as file:\n file_content = file.read()\n\n extracted_content, plain_text = await ocr_service.process_document(\n file_content=file_content,\n model_type=self.model_type,\n include_confidence=self.include_confidence,\n extract_tables=self.extract_tables,\n )\n\n structured_data = Data(\n text=plain_text,\n data={\n self.SERVER_FILE_PATH_FIELDNAME: str(file_path),\n \"result\": extracted_content,\n },\n )\n\n return structured_data, plain_text\n\n except Exception as e:\n logger.error(f\"Error processing file {file_path}: {e!s}\")\n if not silent_errors:\n raise\n return None, \"\"\n\n def process_files(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process multiple files with concurrent processing.\"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n concurrency = (\n 1\n if not self.use_multithreading\n else max(1, self.concurrency_multithreading)\n )\n file_count = len(file_list)\n\n logger.info(f\"Processing {file_count} files with concurrency: {concurrency}\")\n\n all_plain_text = []\n processed_data = []\n\n if concurrency > 1 and file_count > 1:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n with concurrent.futures.ThreadPoolExecutor(\n max_workers=concurrency\n ) as executor:\n future_to_file = {\n executor.submit(\n lambda path: loop.run_until_complete(\n self.process_file(\n str(path), silent_errors=self.silent_errors\n )\n ),\n file.path,\n ): file\n for file in file_list\n }\n for future in concurrent.futures.as_completed(future_to_file):\n try:\n structured_data, plain_text = future.result()\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error in concurrent processing: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n else:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n for file in file_list:\n try:\n structured_data, plain_text = loop.run_until_complete(\n self.process_file(\n str(file.path), silent_errors=self.silent_errors\n )\n )\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error processing file {file.path}: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n\n # Store concatenated text content\n self._text_content = \"\\n\\n=== NEW DOCUMENT ===\\n\\n\".join(all_plain_text)\n\n return self.rollup_data(file_list, processed_data)\n\n def __del__(self):\n \"\"\"Cleanup temporary files and directory.\"\"\"\n try:\n if hasattr(self, \"temp_dir\") and os.path.exists(self.temp_dir):\n # Remove downloaded files\n for file_path in self._downloaded_files.values():\n if os.path.exists(file_path):\n os.unlink(file_path)\n # Remove the temporary directory\n os.rmdir(self.temp_dir)\n except Exception as e:\n logger.error(f\"Error cleaning up temporary files: {e!s}\")\n" - }, - "concurrency_multithreading": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Processing Concurrency", - "dynamic": false, - "info": "Number of files to process concurrently", - "list": false, - "list_add_label": "Add More", - "name": "concurrency_multithreading", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 2 + "value": "from langflow.custom.custom_component.component import Component\nfrom langflow.helpers.data import safe_convert\nfrom langflow.inputs.inputs import BoolInput, HandleInput, MessageTextInput, MultilineInput, TabInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.template.field.base import Output\n\n\nclass ParserComponent(Component):\n display_name = \"Parser\"\n description = \"Extracts text using a template.\"\n documentation: str = \"https://docs.langflow.org/components-processing#parser\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"input_data\",\n display_name=\"Data or DataFrame\",\n input_types=[\"DataFrame\", \"Data\"],\n info=\"Accepts either a DataFrame or a Data object.\",\n required=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Parser\", \"Stringify\"],\n value=\"Parser\",\n info=\"Convert into raw string instead of using a template.\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"pattern\",\n display_name=\"Template\",\n info=(\n \"Use variables within curly brackets to extract column values for DataFrames \"\n \"or key values for Data.\"\n \"For example: `Name: {Name}, Age: {Age}, Country: {Country}`\"\n ),\n value=\"Text: {text}\", # Example default\n dynamic=True,\n show=True,\n required=True,\n ),\n MessageTextInput(\n name=\"sep\",\n display_name=\"Separator\",\n advanced=True,\n value=\"\\n\",\n info=\"String used to separate rows/items.\",\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Parsed Text\",\n name=\"parsed_text\",\n info=\"Formatted text output.\",\n method=\"parse_combined_text\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Dynamically hide/show `template` and enforce requirement based on `stringify`.\"\"\"\n if field_name == \"mode\":\n build_config[\"pattern\"][\"show\"] = self.mode == \"Parser\"\n build_config[\"pattern\"][\"required\"] = self.mode == \"Parser\"\n if field_value:\n clean_data = BoolInput(\n name=\"clean_data\",\n display_name=\"Clean Data\",\n info=(\n \"Enable to clean the data by removing empty rows and lines \"\n \"in each cell of the DataFrame/ Data object.\"\n ),\n value=True,\n advanced=True,\n required=False,\n )\n build_config[\"clean_data\"] = clean_data.to_dict()\n else:\n build_config.pop(\"clean_data\", None)\n\n return build_config\n\n def _clean_args(self):\n \"\"\"Prepare arguments based on input type.\"\"\"\n input_data = self.input_data\n\n match input_data:\n case list() if all(isinstance(item, Data) for item in input_data):\n msg = \"List of Data objects is not supported.\"\n raise ValueError(msg)\n case DataFrame():\n return input_data, None\n case Data():\n return None, input_data\n case dict() if \"data\" in input_data:\n try:\n if \"columns\" in input_data: # Likely a DataFrame\n return DataFrame.from_dict(input_data), None\n # Likely a Data object\n return None, Data(**input_data)\n except (TypeError, ValueError, KeyError) as e:\n msg = f\"Invalid structured input provided: {e!s}\"\n raise ValueError(msg) from e\n case _:\n msg = f\"Unsupported input type: {type(input_data)}. Expected DataFrame or Data.\"\n raise ValueError(msg)\n\n def parse_combined_text(self) -> Message:\n \"\"\"Parse all rows/items into a single text or convert input to string if `stringify` is enabled.\"\"\"\n # Early return for stringify option\n if self.mode == \"Stringify\":\n return self.convert_to_string()\n\n df, data = self._clean_args()\n\n lines = []\n if df is not None:\n for _, row in df.iterrows():\n formatted_text = self.pattern.format(**row.to_dict())\n lines.append(formatted_text)\n elif data is not None:\n formatted_text = self.pattern.format(**data.data)\n lines.append(formatted_text)\n\n combined_text = self.sep.join(lines)\n self.status = combined_text\n return Message(text=combined_text)\n\n def convert_to_string(self) -> Message:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n result = \"\"\n if isinstance(self.input_data, list):\n result = \"\\n\".join([safe_convert(item, clean_data=self.clean_data or False) for item in self.input_data])\n else:\n result = safe_convert(self.input_data or False)\n self.log(f\"Converted to string with length: {len(result)}\")\n\n message = Message(text=result)\n self.status = message\n return message\n" }, - "delete_server_file_after_processing": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Delete Server File After Processing", - "dynamic": false, - "info": "If true, the Server File Path will be deleted after processing.", - "list": false, - "list_add_label": "Add More", - "name": "delete_server_file_after_processing", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "extract_tables": { - "_input_type": "BoolInput", - "advanced": false, - "display_name": "Extract Tables", - "dynamic": false, - "info": "Extract and format tables from the document", - "list": false, - "list_add_label": "Add More", - "name": "extract_tables", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "file_path": { + "input_data": { "_input_type": "HandleInput", - "advanced": true, - "display_name": "Server File Path", + "advanced": false, + "display_name": "Data or DataFrame", "dynamic": false, - "info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.", + "info": "Accepts either a DataFrame or a Data object.", "input_types": [ - "Data", - "Message" + "DataFrame", + "Data" ], - "list": true, + "list": false, "list_add_label": "Add More", - "name": "file_path", + "name": "input_data", "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, "trace_as_metadata": true, "type": "other", "value": "" }, - "ignore_unspecified_files": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Ignore Unspecified Files", - "dynamic": false, - "info": "If true, Data with no 'file_path' property will be ignored.", - "list": false, - "list_add_label": "Add More", - "name": "ignore_unspecified_files", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "ignore_unsupported_extensions": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Ignore Unsupported Extensions", - "dynamic": false, - "info": "If true, files with unsupported extensions will not be processed.", - "list": false, - "list_add_label": "Add More", - "name": "ignore_unsupported_extensions", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "include_confidence": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Include Confidence Scores", - "dynamic": false, - "info": "Include confidence scores in the extracted text", - "list": false, - "list_add_label": "Add More", - "name": "include_confidence", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "model_type": { - "_input_type": "DropdownInput", + "mode": { + "_input_type": "TabInput", "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Model Type", + "display_name": "Mode", "dynamic": false, - "info": "Choose the Form Recognizer model to use", - "name": "model_type", + "info": "Convert into raw string instead of using a template.", + "name": "mode", "options": [ - "prebuilt-document", - "prebuilt-read", - "prebuilt-layout" + "Parser", + "Stringify" ], - "options_metadata": [], "placeholder": "", + "real_time_refresh": true, "required": false, "show": true, "title_case": false, - "toggle": false, "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "prebuilt-document" + "type": "tab", + "value": "Parser" }, - "silent_errors": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Silent Errors", - "dynamic": false, - "info": "If true, errors will not raise an exception.", - "list": false, - "list_add_label": "Add More", - "name": "silent_errors", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "url": { - "_input_type": "HandleInput", + "pattern": { + "_input_type": "MultilineInput", "advanced": false, - "display_name": "URL", - "dynamic": false, - "info": "URL to the document to process", + "copy_field": false, + "display_name": "Template", + "dynamic": true, + "info": "Use variables within curly brackets to extract column values for DataFrames or key values for Data.For example: `Name: {Name}, Age: {Age}, Country: {Country}`", "input_types": [ - "str", - "Data", - "Message", - "list" + "Message" ], "list": false, "list_add_label": "Add More", - "name": "url", + "load_from_db": false, + "multiline": true, + "name": "pattern", "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, + "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "other", - "value": "" + "type": "str", + "value": "{value}" }, - "use_multithreading": { - "_input_type": "BoolInput", - "advanced": false, - "display_name": "Use Concurrent Processing", + "sep": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Separator", "dynamic": false, - "info": "Enable concurrent processing of multiple files", + "info": "String used to separate rows/items.", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", - "name": "use_multithreading", + "load_from_db": false, + "name": "sep", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "str", + "value": "\n" } }, "tool_mode": false }, "showNode": true, - "type": "azure_ocr" + "type": "ParserComponent" }, "dragging": false, - "id": "azure_ocr-kDLMl", + "id": "ParserComponent-guFKQ", "measured": { - "height": 393, + "height": 329, "width": 320 }, "position": { - "x": -2904.0228579728882, - "y": -1999.068581457536 + "x": 5539.478176138232, + "y": 168.72312218089394 }, "selected": false, "type": "genericNode" } ], "viewport": { - "x": 1700.7999158936611, - "y": 1557.4478342145535, - "zoom": 0.5358602617405928 + "x": -3041.448953943585, + "y": 321.5009249214689, + "zoom": 0.6324124198345966 } }, - "description": "\nThe CPT Code Lookup Agent provides a description of a medical procedure based on a given CPT code, aiding in billing and clinical reference.\n\n\n", + "description": "Language Models, Unleashed.", "endpoint_name": null, + "id": "d44b4ea9-c3fe-4676-bcaa-682d78634d54", "is_component": false, - "last_tested_version": "1.4.3", - "name": "CPT Code Agent", + "last_tested_version": "1.6.3", + "name": "CPT Code Extractor", "tags": [ "chart-review" ] diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Clinical Entity Extraction.json b/src/backend/base/langflow/initial_setup/starter_projects/Clinical Entity Extraction.json index 6ca8e5e91b9c..87e35f6f8b55 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Clinical Entity Extraction.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Clinical Entity Extraction.json @@ -6,299 +6,163 @@ "className": "", "data": { "sourceHandle": { - "dataType": "BlobStorage", - "id": "BlobStorage-Wzy6T", - "name": "file_path", + "dataType": "ParserComponent", + "id": "ParserComponent-VotrW", + "name": "parsed_text", "output_types": [ - "Data" + "Message" ] }, "targetHandle": { - "fieldName": "url", - "id": "azure_ocr-EZE9i", + "fieldName": "input_value", + "id": "Agent-CAhED", "inputTypes": [ - "str", - "Data", - "Message", - "list" + "Message" ], - "type": "other" + "type": "str" } }, - "id": "xy-edge__BlobStorage-Wzy6T{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-Wzy6Tœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}-azure_ocr-EZE9i{œfieldNameœ:œurlœ,œidœ:œazure_ocr-EZE9iœ,œinputTypesœ:[œstrœ,œDataœ,œMessageœ,œlistœ],œtypeœ:œotherœ}", + "id": "xy-edge__ParserComponent-VotrW{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-VotrWœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Agent-CAhED{œfieldNameœ:œinput_valueœ,œidœ:œAgent-CAhEDœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "BlobStorage-Wzy6T", - "sourceHandle": "{œdataTypeœ: œBlobStorageœ, œidœ: œBlobStorage-Wzy6Tœ, œnameœ: œfile_pathœ, œoutput_typesœ: [œDataœ]}", - "target": "azure_ocr-EZE9i", - "targetHandle": "{œfieldNameœ: œurlœ, œidœ: œazure_ocr-EZE9iœ, œinputTypesœ: [œstrœ, œDataœ, œMessageœ, œlistœ], œtypeœ: œotherœ}" + "source": "ParserComponent-VotrW", + "sourceHandle": "{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-VotrWœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}", + "target": "Agent-CAhED", + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œAgent-CAhEDœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "azure_ocr", - "id": "azure_ocr-EZE9i", - "name": "structured_data", + "dataType": "AutonomizeModel", + "id": "AutonomizeModel-Tr1Ab", + "name": "component_as_tool", "output_types": [ - "Data" + "Tool" ] }, "targetHandle": { - "fieldName": "data", - "id": "ParseData-ABZoG", + "fieldName": "tools", + "id": "Agent-CAhED", "inputTypes": [ - "Data" + "Tool" ], "type": "other" } }, - "id": "xy-edge__azure_ocr-EZE9i{œdataTypeœ:œazure_ocrœ,œidœ:œazure_ocr-EZE9iœ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataœ]}-ParseData-ABZoG{œfieldNameœ:œdataœ,œidœ:œParseData-ABZoGœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "id": "xy-edge__AutonomizeModel-Tr1Ab{œdataTypeœ:œAutonomizeModelœ,œidœ:œAutonomizeModel-Tr1Abœ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}-Agent-CAhED{œfieldNameœ:œtoolsœ,œidœ:œAgent-CAhEDœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}", "selected": false, - "source": "azure_ocr-EZE9i", - "sourceHandle": "{œdataTypeœ: œazure_ocrœ, œidœ: œazure_ocr-EZE9iœ, œnameœ: œstructured_dataœ, œoutput_typesœ: [œDataœ]}", - "target": "ParseData-ABZoG", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œParseData-ABZoGœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "source": "AutonomizeModel-Tr1Ab", + "sourceHandle": "{œdataTypeœ:œAutonomizeModelœ,œidœ:œAutonomizeModel-Tr1Abœ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}", + "target": "Agent-CAhED", + "targetHandle": "{œfieldNameœ:œtoolsœ,œidœ:œAgent-CAhEDœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "ParseData", - "id": "ParseData-ABZoG", - "name": "text", + "dataType": "Agent", + "id": "Agent-CAhED", + "name": "response", "output_types": [ "Message" ] }, "targetHandle": { - "fieldName": "search_query", - "id": "ClinicalLLM-fOoUf", + "fieldName": "input_value", + "id": "ChatOutput-DEzJg", "inputTypes": [ + "Data", + "DataFrame", "Message" ], - "type": "str" + "type": "other" } }, - "id": "xy-edge__ParseData-ABZoG{œdataTypeœ:œParseDataœ,œidœ:œParseData-ABZoGœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-ClinicalLLM-fOoUf{œfieldNameœ:œsearch_queryœ,œidœ:œClinicalLLM-fOoUfœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "xy-edge__Agent-CAhED{œdataTypeœ:œAgentœ,œidœ:œAgent-CAhEDœ,œnameœ:œresponseœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-DEzJg{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-DEzJgœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", "selected": false, - "source": "ParseData-ABZoG", - "sourceHandle": "{œdataTypeœ: œParseDataœ, œidœ: œParseData-ABZoGœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", - "target": "ClinicalLLM-fOoUf", - "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œClinicalLLM-fOoUfœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "Agent-CAhED", + "sourceHandle": "{œdataTypeœ:œAgentœ,œidœ:œAgent-CAhEDœ,œnameœ:œresponseœ,œoutput_typesœ:[œMessageœ]}", + "target": "ChatOutput-DEzJg", + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-DEzJgœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "ClinicalLLM", - "id": "ClinicalLLM-fOoUf", - "name": "prediction", + "dataType": "AzureDocumentIntelligence", + "id": "AzureDocumentIntelligence-cekA1", + "name": "structured_data", "output_types": [ - "Data" + "DataFrame" ] }, "targetHandle": { - "fieldName": "data", - "id": "JSONOutput-b6rNP", + "fieldName": "input_data", + "id": "ParserComponent-VotrW", "inputTypes": [ + "DataFrame", "Data" ], "type": "other" } }, - "id": "xy-edge__ClinicalLLM-fOoUf{œdataTypeœ:œClinicalLLMœ,œidœ:œClinicalLLM-fOoUfœ,œnameœ:œpredictionœ,œoutput_typesœ:[œDataœ]}-JSONOutput-b6rNP{œfieldNameœ:œdataœ,œidœ:œJSONOutput-b6rNPœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "id": "xy-edge__AzureDocumentIntelligence-cekA1{œdataTypeœ:œAzureDocumentIntelligenceœ,œidœ:œAzureDocumentIntelligence-cekA1œ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataFrameœ]}-ParserComponent-VotrW{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-VotrWœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", "selected": false, - "source": "ClinicalLLM-fOoUf", - "sourceHandle": "{œdataTypeœ: œClinicalLLMœ, œidœ: œClinicalLLM-fOoUfœ, œnameœ: œpredictionœ, œoutput_typesœ: [œDataœ]}", - "target": "JSONOutput-b6rNP", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œJSONOutput-b6rNPœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" - } - ], - "nodes": [ + "source": "AzureDocumentIntelligence-cekA1", + "sourceHandle": "{œdataTypeœ:œAzureDocumentIntelligenceœ,œidœ:œAzureDocumentIntelligence-cekA1œ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataFrameœ]}", + "target": "ParserComponent-VotrW", + "targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-VotrWœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}" + }, { + "animated": false, + "className": "", "data": { - "id": "BlobStorage-Wzy6T", - "node": { - "base_classes": [ + "sourceHandle": { + "dataType": "BlobStorage", + "id": "BlobStorage-sNo3r", + "name": "file_path", + "output_types": [ "Data" - ], - "beta": false, - "category": "inputs", - "conditional_paths": [], - "custom_fields": {}, - "description": "Load files from Azure Blob Storage", - "display_name": "Blob Storage", - "documentation": "http://docs.langflow.org/components/storage", - "edited": false, - "field_order": [ - "storage_account", - "container_name", - "file_name", - "return_all_files" - ], - "frozen": false, - "icon": "Autonomize", - "key": "BlobStorage", - "legacy": false, - "lf_version": "1.4.3", - "metadata": {}, - "minimized": false, - "output_types": [], - "outputs": [ - { - "allows_loop": false, - "cache": true, - "display_name": "File Path", - "group_outputs": false, - "method": "get_file_paths", - "name": "file_path", - "selected": "Data", - "tool_mode": true, - "types": [ - "Data" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "score": 0.007568328950209746, - "template": { - "_type": "Component", - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "code", - "value": "\"\"\"Blob Storage Component for loading files from Azure Blob Storage.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.services.deps import get_flexstore_service\nfrom loguru import logger\n\n\nclass BlobStorageComponent(Component):\n display_name = \"Blob Storage\"\n category: str = \"input_output\"\n description = \"Load files from Azure Blob Storage\"\n documentation = \"http://docs.langflow.org/components/storage\"\n icon = \"Autonomize\"\n name = \"BlobStorage\"\n\n # Match the property name expected by FileComponent\n FILE_PATH_FIELD = \"file_path\"\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._container_list: list[str] = []\n self._file_list: list[str] = []\n\n inputs = [\n StrInput(\n name=\"storage_account\",\n display_name=\"Storage Account\",\n required=False,\n info=\"Storage Account name\",\n advanced=True,\n ),\n DropdownInput(\n name=\"container_name\",\n display_name=\"Container\",\n info=\"Select a container from the storage account\",\n required=True,\n refresh_button=True,\n ),\n DropdownInput(\n name=\"file_name\",\n display_name=\"File\",\n info=\"Select a file from the container\",\n required=True,\n refresh_button=True,\n ),\n BoolInput(\n name=\"return_all_files\",\n display_name=\"Return All Files\",\n info=\"If true and no specific file is selected, returns all files in the container\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"file_path\", # Match the property name expected by FileComponent\n display_name=\"File Path\",\n method=\"get_file_paths\",\n ),\n ]\n\n async def update_build_config(\n self, build_config: dict, field_value: Any, field_name: str | None = None\n ):\n \"\"\"Update the build configuration based on field changes.\"\"\"\n logger.info(f\"update_build_config called with field_name: {field_name}\")\n\n storage_account = getattr(self, \"storage_account\", None)\n container_name = getattr(self, \"container_name\", None)\n\n if field_name == \"container_name\":\n try:\n # Load the container options when the field is refreshed\n service = get_flexstore_service()\n self._container_list = await service.get_containers(storage_account)\n\n build_config[\"container_name\"][\"options\"] = self._container_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating container list: {e!s}\")\n raise\n\n elif field_name == \"file_name\" and container_name:\n try:\n # Load the file options when the field is refreshed\n service = get_flexstore_service()\n self._file_list = await service.get_files(\n storage_account, container_name\n )\n\n build_config[\"file_name\"][\"options\"] = self._file_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating file list: {e!s}\")\n raise\n\n return build_config\n\n async def get_file_paths(self) -> list[Data]:\n \"\"\"Get file paths for the FileComponent to process.\"\"\"\n try:\n if not self.container_name:\n logger.warning(\"Container name is required.\")\n return []\n\n service = get_flexstore_service()\n file_paths = []\n\n # If a specific file is selected\n if self.file_name:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, self.file_name\n )\n if signed_url:\n file_paths = [Data(data={self.FILE_PATH_FIELD: signed_url})]\n # If no specific file is selected and return_all_files is True\n elif self.return_all_files:\n files = await service.get_files(\n self.storage_account, self.container_name\n )\n for file in files:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, file\n )\n if signed_url:\n file_paths.append(Data(data={self.FILE_PATH_FIELD: signed_url}))\n\n if file_paths:\n self.status = file_paths\n logger.info(f\"Generated {len(file_paths)} file paths\")\n for path in file_paths:\n logger.debug(f\"File path: {path.data.get(self.FILE_PATH_FIELD)}\")\n else:\n logger.warning(\"No file paths generated\")\n\n return file_paths\n\n except Exception as e:\n logger.error(f\"Error in get_file_paths: {e!s}\")\n return []\n" - }, - "container_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Container", - "dynamic": false, - "info": "Select a container from the storage account", - "name": "container_name", - "options": [], - "options_metadata": [], - "placeholder": "", - "refresh_button": true, - "required": true, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "genesis-container" - }, - "file_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "File", - "dynamic": false, - "info": "Select a file from the container", - "name": "file_name", - "options": [], - "options_metadata": [], - "placeholder": "", - "refresh_button": true, - "required": true, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "0108_Subject199.pdf" - }, - "return_all_files": { - "_input_type": "BoolInput", - "advanced": false, - "display_name": "Return All Files", - "dynamic": false, - "info": "If true and no specific file is selected, returns all files in the container", - "list": false, - "list_add_label": "Add More", - "name": "return_all_files", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "storage_account": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Storage Account", - "dynamic": false, - "info": "Storage Account name", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "storage_account", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - } - }, - "tool_mode": false + ] }, - "showNode": true, - "type": "BlobStorage" - }, - "dragging": false, - "id": "BlobStorage-Wzy6T", - "measured": { - "height": 365, - "width": 320 - }, - "position": { - "x": -236.32096982616602, - "y": -32.33828000921607 + "targetHandle": { + "fieldName": "url", + "id": "AzureDocumentIntelligence-cekA1", + "inputTypes": [ + "str", + "Data", + "Message", + "list" + ], + "type": "other" + } }, + "id": "xy-edge__BlobStorage-sNo3r{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-sNo3rœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}-AzureDocumentIntelligence-cekA1{œfieldNameœ:œurlœ,œidœ:œAzureDocumentIntelligence-cekA1œ,œinputTypesœ:[œstrœ,œDataœ,œMessageœ,œlistœ],œtypeœ:œotherœ}", "selected": false, - "type": "genericNode" - }, + "source": "BlobStorage-sNo3r", + "sourceHandle": "{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-sNo3rœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}", + "target": "AzureDocumentIntelligence-cekA1", + "targetHandle": "{œfieldNameœ:œurlœ,œidœ:œAzureDocumentIntelligence-cekA1œ,œinputTypesœ:[œstrœ,œDataœ,œMessageœ,œlistœ],œtypeœ:œotherœ}" + } + ], + "nodes": [ { "data": { - "id": "azure_ocr-EZE9i", + "id": "AzureDocumentIntelligence-cekA1", "node": { "base_classes": [ - "Data" + "DataFrame" ], "beta": false, "category": "models", "conditional_paths": [], "custom_fields": {}, - "description": "Process documents using Azure Form Recognizer OCR capabilities", - "display_name": "Form Recognizer", - "documentation": "", + "description": "Process documents using Azure Document Intelligence (formerly Form Recognizer) for OCR, form extraction, and document analysis", + "display_name": "Azure Document Intelligence", + "documentation": "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/", "edited": false, "field_order": [ "url", @@ -315,9 +179,9 @@ ], "frozen": false, "icon": "Azure", - "key": "azure_ocr", + "key": "AzureDocumentIntelligence", "legacy": false, - "lf_version": "1.4.3", + "lf_version": "1.6.3", "metadata": {}, "minimized": false, "output_types": [], @@ -326,18 +190,20 @@ "allows_loop": false, "cache": true, "display_name": "Structured Data", + "group_outputs": false, "method": "load_files", "name": "structured_data", - "selected": "Data", + "selected": "DataFrame", "tool_mode": true, "types": [ - "Data" + "DataFrame" ], "value": "__UNDEFINED__" } ], "pinned": false, - "score": 0.007568328950209746, + "priority": 3, + "score": 0.10489765225226892, "template": { "_type": "Component", "code": { @@ -356,7 +222,7 @@ "show": true, "title_case": false, "type": "code", - "value": "\"\"\"Form Recognizer Component for processing and analyzing form data.\"\"\"\n\nimport asyncio\nimport concurrent.futures\nimport mimetypes\nimport os\nimport tempfile\nfrom pathlib import Path\nfrom urllib.parse import unquote, urlparse\n\nimport aiohttp\nimport requests\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, Output\nfrom langflow.schema import Data\nfrom loguru import logger\n\n\nclass FormRecognizerComponent(BaseFileComponent):\n \"\"\"Component for recognizing and processing form data.\"\"\"\n\n display_name = \"Form Recognizer\"\n description = \"Process documents using Azure Form Recognizer OCR capabilities\"\n icon = \"Azure\"\n name = \"azure_ocr\"\n # legacy = True\n\n VALID_EXTENSIONS = [\"pdf\", \"jpg\", \"jpeg\", \"png\", \"bmp\", \"tiff\", \"tif\"]\n\n inputs = [\n HandleInput(\n name=\"url\",\n display_name=\"URL\",\n info=\"URL to the document to process\",\n input_types=[\"str\", \"Data\", \"Message\", \"list\"],\n required=False,\n ),\n # Include only the HandleInput and BoolInputs from base_inputs\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"file_path\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"silent_errors\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"delete_server_file_after_processing\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unsupported_extensions\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unspecified_files\"\n ),\n DropdownInput(\n name=\"model_type\",\n display_name=\"Model Type\",\n options=[\"prebuilt-document\", \"prebuilt-read\", \"prebuilt-layout\"],\n value=\"prebuilt-document\",\n info=\"Choose the Form Recognizer model to use\",\n ),\n BoolInput(\n name=\"extract_tables\",\n display_name=\"Extract Tables\",\n value=True,\n info=\"Extract and format tables from the document\",\n ),\n BoolInput(\n name=\"include_confidence\",\n display_name=\"Include Confidence Scores\",\n value=False,\n advanced=True,\n info=\"Include confidence scores in the extracted text\",\n ),\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"Use Concurrent Processing\",\n value=True,\n info=\"Enable concurrent processing of multiple files\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"Number of files to process concurrently\",\n value=2,\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Structured Data\", name=\"structured_data\", method=\"load_files\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self.temp_dir = tempfile.mkdtemp()\n self._downloaded_files = {}\n self._text_content = \"\"\n\n\n def get_text_content(self) -> str:\n \"\"\"Return the concatenated text content from all processed pages.\"\"\"\n return self._text_content\n\n def _extract_filename_from_url(self, url: str) -> str:\n \"\"\"Extract filename from URL or generate a default one.\"\"\"\n try:\n logger.debug(f\"Extracting filename from URL: {url}\")\n parsed_url = urlparse(url)\n path = unquote(parsed_url.path)\n filename = os.path.basename(path)\n\n if filename and \".\" in filename:\n logger.debug(f\"Found filename in URL path: {filename}\")\n return filename\n\n response = requests.head(url, allow_redirects=True)\n if \"content-disposition\" in response.headers:\n content_disp = response.headers[\"content-disposition\"]\n if \"filename=\" in content_disp:\n filename = content_disp.split(\"filename=\")[1].strip(\"\\\"'\")\n logger.debug(f\"Found filename in content-disposition: {filename}\")\n return filename\n\n if \"content-type\" in response.headers:\n ext = mimetypes.guess_extension(response.headers[\"content-type\"])\n if ext:\n filename = f\"downloaded{ext}\"\n logger.debug(f\"Generated filename from content-type: {filename}\")\n return filename\n\n logger.debug(\"Using default filename: downloaded.pdf\")\n return \"downloaded.pdf\"\n except Exception as e:\n logger.error(f\"Error extracting filename from URL: {e!s}\")\n return \"downloaded.pdf\"\n\n async def _download_file_from_url(self, url: str) -> str | None:\n \"\"\"Download a file from a URL.\"\"\"\n try:\n logger.debug(f\"Attempting to download file from URL: {url}\")\n filename = self._extract_filename_from_url(url)\n local_path = os.path.join(self.temp_dir, filename)\n logger.debug(f\"Local path for download: {local_path}\")\n\n async with aiohttp.ClientSession() as session:\n async with session.get(url) as response:\n response.raise_for_status()\n with open(local_path, \"wb\") as f:\n while True:\n chunk = await response.content.read(8192)\n if not chunk:\n break\n f.write(chunk)\n\n self._downloaded_files[url] = local_path\n logger.info(f\"Successfully downloaded file to {local_path}\")\n return local_path\n\n except Exception as e:\n logger.error(f\"Error downloading file from URL: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n def _extract_url_from_input(self, input_data) -> str | None:\n \"\"\"Extract URL string from various input types.\"\"\"\n logger.debug(f\"Extracting URL from input data type: {type(input_data)}\")\n\n # Handle list of Data objects (from blob storage)\n if isinstance(input_data, list):\n logger.debug(f\"Processing list input with {len(input_data)} items\")\n if input_data and isinstance(input_data[0], Data):\n url = input_data[0].data.get(\"file_path\")\n logger.debug(f\"Extracted URL from first Data object in list: {url}\")\n return url\n return None\n\n if isinstance(input_data, str):\n logger.debug(f\"Input is string: {input_data}\")\n return input_data\n elif isinstance(input_data, Data):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from Data object: {url}\")\n return url\n elif hasattr(input_data, \"text\"):\n logger.debug(f\"Extracted URL from text attribute: {input_data.text}\")\n return input_data.text\n elif hasattr(input_data, \"data\"):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from data attribute: {url}\")\n return url\n logger.debug(\"No URL found in input data\")\n return None\n\n def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Handle URLs and local paths.\"\"\"\n resolved_files = []\n logger.debug(\"Starting path validation and resolution\")\n\n # Handle URL input if provided\n if hasattr(self, \"url\") and self.url:\n try:\n logger.debug(f\"Processing URL input: {self.url}\")\n # Extract URL from different input types\n url = self._extract_url_from_input(self.url)\n if not url:\n logger.warning(\"No valid URL found in input\")\n return resolved_files\n\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(url)\n )\n finally:\n loop.close()\n\n if local_path:\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": url,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n except Exception as e:\n logger.error(f\"Error processing URL {url}: {e!s}\")\n if not self.silent_errors:\n raise\n\n # Handle file_path input\n file_path = self._file_path_as_list()\n logger.debug(f\"Processing file_path input: {file_path}\")\n for obj in file_path:\n server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)\n logger.debug(f\"Processing server file path: {server_file_path}\")\n\n if not server_file_path:\n if not self.ignore_unspecified_files:\n msg = f\"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property.\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n try:\n # Check if it's a URL\n if isinstance(server_file_path, str) and server_file_path.startswith(\n (\"http://\", \"https://\")\n ):\n logger.debug(f\"Processing URL from file_path: {server_file_path}\")\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(server_file_path)\n )\n finally:\n loop.close()\n\n if not local_path:\n continue\n\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": server_file_path,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n else:\n # Handle local files\n resolved_path = Path(self.resolve_path(str(server_file_path)))\n logger.debug(f\"Resolved local file path: {resolved_path}\")\n if not resolved_path.exists():\n msg = f\"File not found: {server_file_path}\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n obj,\n resolved_path,\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n\n except Exception as e:\n logger.error(f\"Error processing path {server_file_path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n logger.debug(f\"Resolved {len(resolved_files)} files\")\n return resolved_files\n\n async def process_file(\n self, file_path: str, *, silent_errors: bool = False\n ) -> tuple[Data, str]:\n \"\"\"Process a single file using the OCR service.\"\"\"\n try:\n from langflow.services.manager import service_manager\n\n ocr_service = service_manager.get(\"ocr_service\")\n\n with open(file_path, \"rb\") as file:\n file_content = file.read()\n\n extracted_content, plain_text = await ocr_service.process_document(\n file_content=file_content,\n model_type=self.model_type,\n include_confidence=self.include_confidence,\n extract_tables=self.extract_tables,\n )\n\n structured_data = Data(\n text=plain_text,\n data={\n self.SERVER_FILE_PATH_FIELDNAME: str(file_path),\n \"result\": extracted_content,\n },\n )\n\n return structured_data, plain_text\n\n except Exception as e:\n logger.error(f\"Error processing file {file_path}: {e!s}\")\n if not silent_errors:\n raise\n return None, \"\"\n\n def process_files(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process multiple files with concurrent processing.\"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n concurrency = (\n 1\n if not self.use_multithreading\n else max(1, self.concurrency_multithreading)\n )\n file_count = len(file_list)\n\n logger.info(f\"Processing {file_count} files with concurrency: {concurrency}\")\n\n all_plain_text = []\n processed_data = []\n\n if concurrency > 1 and file_count > 1:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n with concurrent.futures.ThreadPoolExecutor(\n max_workers=concurrency\n ) as executor:\n future_to_file = {\n executor.submit(\n lambda path: loop.run_until_complete(\n self.process_file(\n str(path), silent_errors=self.silent_errors\n )\n ),\n file.path,\n ): file\n for file in file_list\n }\n for future in concurrent.futures.as_completed(future_to_file):\n try:\n structured_data, plain_text = future.result()\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error in concurrent processing: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n else:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n for file in file_list:\n try:\n structured_data, plain_text = loop.run_until_complete(\n self.process_file(\n str(file.path), silent_errors=self.silent_errors\n )\n )\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error processing file {file.path}: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n\n # Store concatenated text content\n self._text_content = \"\\n\\n=== NEW DOCUMENT ===\\n\\n\".join(all_plain_text)\n\n return self.rollup_data(file_list, processed_data)\n\n def __del__(self):\n \"\"\"Cleanup temporary files and directory.\"\"\"\n try:\n if hasattr(self, \"temp_dir\") and os.path.exists(self.temp_dir):\n # Remove downloaded files\n for file_path in self._downloaded_files.values():\n if os.path.exists(file_path):\n os.unlink(file_path)\n # Remove the temporary directory\n os.rmdir(self.temp_dir)\n except Exception as e:\n logger.error(f\"Error cleaning up temporary files: {e!s}\")\n" + "value": "\"\"\"Azure Document Intelligence Component - Form recognition and document processing.\"\"\"\n\nimport asyncio\nimport concurrent.futures\nimport mimetypes\nimport os\nimport tempfile\nfrom pathlib import Path\nfrom urllib.parse import unquote, urlparse\n\nimport aiohttp\nimport requests\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, Output\nfrom langflow.schema.data import Data\nfrom loguru import logger\n\n\nclass AzureDocumentIntelligenceComponent(BaseFileComponent):\n \"\"\"Component for Azure Document Intelligence - advanced document processing and form recognition.\"\"\"\n\n display_name: str = \"Azure Document Intelligence\"\n description: str = \"Process documents using Azure Document Intelligence (formerly Form Recognizer) for OCR, form extraction, and document analysis\"\n documentation: str = \"https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/\"\n icon: str = \"Azure\"\n name: str = \"AzureDocumentIntelligence\"\n category: str = \"models\"\n priority: int = 3 # High priority for document processing\n\n VALID_EXTENSIONS = [\"pdf\", \"jpg\", \"jpeg\", \"png\", \"bmp\", \"tiff\", \"tif\"]\n\n inputs = [\n HandleInput(\n name=\"url\",\n display_name=\"URL\",\n info=\"URL to the document to process\",\n input_types=[\"str\", \"Data\", \"Message\", \"list\"],\n required=False,\n ),\n # Include only the HandleInput and BoolInputs from base_inputs\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"file_path\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"silent_errors\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"delete_server_file_after_processing\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unsupported_extensions\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unspecified_files\"\n ),\n DropdownInput(\n name=\"model_type\",\n display_name=\"Model Type\",\n options=[\"prebuilt-document\", \"prebuilt-read\", \"prebuilt-layout\"],\n value=\"prebuilt-document\",\n info=\"Choose the Form Recognizer model to use\",\n ),\n BoolInput(\n name=\"extract_tables\",\n display_name=\"Extract Tables\",\n value=True,\n info=\"Extract and format tables from the document\",\n ),\n BoolInput(\n name=\"include_confidence\",\n display_name=\"Include Confidence Scores\",\n value=False,\n advanced=True,\n info=\"Include confidence scores in the extracted text\",\n ),\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"Use Concurrent Processing\",\n value=True,\n info=\"Enable concurrent processing of multiple files\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"Number of files to process concurrently\",\n value=2,\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Structured Data\", name=\"structured_data\", method=\"load_files\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self.temp_dir = tempfile.mkdtemp()\n self._downloaded_files = {}\n self._text_content = \"\"\n\n def get_text_content(self) -> str:\n \"\"\"Return the concatenated text content from all processed pages.\"\"\"\n return self._text_content\n\n def _extract_filename_from_url(self, url: str) -> str:\n \"\"\"Extract filename from URL or generate a default one.\"\"\"\n try:\n logger.debug(f\"Extracting filename from URL: {url}\")\n parsed_url = urlparse(url)\n path = unquote(parsed_url.path)\n filename = os.path.basename(path)\n\n if filename and \".\" in filename:\n logger.debug(f\"Found filename in URL path: {filename}\")\n return filename\n\n response = requests.head(url, allow_redirects=True)\n if \"content-disposition\" in response.headers:\n content_disp = response.headers[\"content-disposition\"]\n if \"filename=\" in content_disp:\n filename = content_disp.split(\"filename=\")[1].strip(\"\\\"'\")\n logger.debug(f\"Found filename in content-disposition: {filename}\")\n return filename\n\n if \"content-type\" in response.headers:\n ext = mimetypes.guess_extension(response.headers[\"content-type\"])\n if ext:\n filename = f\"downloaded{ext}\"\n logger.debug(f\"Generated filename from content-type: {filename}\")\n return filename\n\n logger.debug(\"Using default filename: downloaded.pdf\")\n return \"downloaded.pdf\"\n except Exception as e:\n logger.error(f\"Error extracting filename from URL: {e!s}\")\n return \"downloaded.pdf\"\n\n async def _download_file_from_url(self, url: str) -> str | None:\n \"\"\"Download a file from a URL.\"\"\"\n try:\n logger.debug(f\"Attempting to download file from URL: {url}\")\n filename = self._extract_filename_from_url(url)\n local_path = os.path.join(self.temp_dir, filename)\n logger.debug(f\"Local path for download: {local_path}\")\n\n async with aiohttp.ClientSession() as session:\n async with session.get(url) as response:\n response.raise_for_status()\n with open(local_path, \"wb\") as f:\n while True:\n chunk = await response.content.read(8192)\n if not chunk:\n break\n f.write(chunk)\n\n self._downloaded_files[url] = local_path\n logger.info(f\"Successfully downloaded file to {local_path}\")\n return local_path\n\n except Exception as e:\n logger.error(f\"Error downloading file from URL: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n def _extract_url_from_input(self, input_data) -> str | None:\n \"\"\"Extract URL string from various input types.\"\"\"\n logger.debug(f\"Extracting URL from input data type: {type(input_data)}\")\n\n # Handle list of Data objects (from blob storage)\n if isinstance(input_data, list):\n logger.debug(f\"Processing list input with {len(input_data)} items\")\n if input_data and isinstance(input_data[0], Data):\n url = input_data[0].data.get(\"file_path\")\n logger.debug(f\"Extracted URL from first Data object in list: {url}\")\n return url\n return None\n\n if isinstance(input_data, str):\n logger.debug(f\"Input is string: {input_data}\")\n return input_data\n elif isinstance(input_data, Data):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from Data object: {url}\")\n return url\n elif hasattr(input_data, \"text\"):\n logger.debug(f\"Extracted URL from text attribute: {input_data.text}\")\n return input_data.text\n elif hasattr(input_data, \"data\"):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from data attribute: {url}\")\n return url\n logger.debug(\"No URL found in input data\")\n return None\n\n def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Handle URLs and local paths.\"\"\"\n resolved_files = []\n logger.debug(\"Starting path validation and resolution\")\n\n # Handle URL input if provided\n if hasattr(self, \"url\") and self.url:\n try:\n logger.debug(f\"Processing URL input: {self.url}\")\n # Extract URL from different input types\n url = self._extract_url_from_input(self.url)\n if not url:\n logger.warning(\"No valid URL found in input\")\n return resolved_files\n\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(url)\n )\n finally:\n loop.close()\n\n if local_path:\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": url,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n except Exception as e:\n logger.error(f\"Error processing URL {url}: {e!s}\")\n if not self.silent_errors:\n raise\n\n # Handle file_path input\n file_path = self._file_path_as_list()\n logger.debug(f\"Processing file_path input: {file_path}\")\n for obj in file_path:\n server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)\n logger.debug(f\"Processing server file path: {server_file_path}\")\n\n if not server_file_path:\n if not self.ignore_unspecified_files:\n msg = f\"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property.\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n try:\n # Check if it's a URL\n if isinstance(server_file_path, str) and server_file_path.startswith(\n (\"http://\", \"https://\")\n ):\n logger.debug(f\"Processing URL from file_path: {server_file_path}\")\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(server_file_path)\n )\n finally:\n loop.close()\n\n if not local_path:\n continue\n\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": server_file_path,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n else:\n # Handle local files\n resolved_path = Path(self.resolve_path(str(server_file_path)))\n logger.debug(f\"Resolved local file path: {resolved_path}\")\n if not resolved_path.exists():\n msg = f\"File not found: {server_file_path}\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n obj,\n resolved_path,\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n\n except Exception as e:\n logger.error(f\"Error processing path {server_file_path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n logger.debug(f\"Resolved {len(resolved_files)} files\")\n return resolved_files\n\n async def process_file(\n self, file_path: str, *, silent_errors: bool = False\n ) -> tuple[Data, str]:\n \"\"\"Process a single file using the OCR service.\"\"\"\n try:\n from langflow.services.deps import get_document_intelligence_service\n \n # Create OCR service directly\n ocr_service = get_document_intelligence_service()\n\n with open(file_path, \"rb\") as file:\n file_content = file.read()\n\n extracted_content, plain_text = await ocr_service.process_document(\n file_content=file_content,\n model_type=self.model_type,\n include_confidence=self.include_confidence,\n extract_tables=self.extract_tables,\n )\n\n structured_data = Data(\n text=plain_text,\n data={\n self.SERVER_FILE_PATH_FIELDNAME: str(file_path),\n \"result\": extracted_content,\n },\n )\n\n return structured_data, plain_text\n\n except Exception as e:\n logger.error(f\"Error processing file {file_path}: {e!s}\")\n if not silent_errors:\n raise\n return None, \"\"\n\n def process_files(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process multiple files with concurrent processing.\"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n concurrency = (\n 1\n if not self.use_multithreading\n else max(1, self.concurrency_multithreading)\n )\n file_count = len(file_list)\n\n logger.info(f\"Processing {file_count} files with concurrency: {concurrency}\")\n\n all_plain_text = []\n processed_data = []\n\n if concurrency > 1 and file_count > 1:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n with concurrent.futures.ThreadPoolExecutor(\n max_workers=concurrency\n ) as executor:\n future_to_file = {\n executor.submit(\n lambda path: loop.run_until_complete(\n self.process_file(\n str(path), silent_errors=self.silent_errors\n )\n ),\n file.path,\n ): file\n for file in file_list\n }\n for future in concurrent.futures.as_completed(future_to_file):\n try:\n structured_data, plain_text = future.result()\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error in concurrent processing: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n else:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n for file in file_list:\n try:\n structured_data, plain_text = loop.run_until_complete(\n self.process_file(\n str(file.path), silent_errors=self.silent_errors\n )\n )\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error processing file {file.path}: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n\n # Store concatenated text content\n self._text_content = \"\\n\\n=== NEW DOCUMENT ===\\n\\n\".join(all_plain_text)\n\n return self.rollup_data(file_list, processed_data)\n\n def __del__(self):\n \"\"\"Cleanup temporary files and directory.\"\"\"\n try:\n if hasattr(self, \"temp_dir\") and os.path.exists(self.temp_dir):\n # Remove downloaded files\n for file_path in self._downloaded_files.values():\n if os.path.exists(file_path):\n os.unlink(file_path)\n # Remove the temporary directory\n os.rmdir(self.temp_dir)\n except Exception as e:\n logger.error(f\"Error cleaning up temporary files: {e!s}\")\n" }, "concurrency_multithreading": { "_input_type": "IntInput", @@ -494,6 +360,7 @@ "dialog_inputs": {}, "display_name": "Model Type", "dynamic": false, + "external_options": {}, "info": "Choose the Form Recognizer model to use", "name": "model_type", "options": [ @@ -575,85 +442,68 @@ "tool_mode": false }, "showNode": true, - "type": "azure_ocr" + "type": "AzureDocumentIntelligence" }, "dragging": false, - "id": "azure_ocr-EZE9i", + "id": "AzureDocumentIntelligence-cekA1", "measured": { - "height": 393, + "height": 365, "width": 320 }, "position": { - "x": 241.2997512963966, - "y": -15.67709206308016 + "x": 450.7437593294486, + "y": 352.95126272844857 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "ParseData-ABZoG", + "id": "ParserComponent-VotrW", "node": { "base_classes": [ - "Data", - "Dict", "Message" ], "beta": false, - "category": "utils", + "category": "processing", "conditional_paths": [], "custom_fields": {}, - "description": "Convert Data objects into Messages using any {field_name} from input data.", - "display_name": "Parse Data", - "documentation": "", + "description": "Extracts text using a template.", + "display_name": "Parser", + "documentation": "https://docs.langflow.org/components-processing#parser", "edited": false, "field_order": [ - "data", - "template", + "input_data", + "mode", + "pattern", "sep" ], "frozen": false, - "icon": "message-square", - "key": "ParseData", - "legacy": true, - "lf_version": "1.4.3", - "metadata": { - "legacy_name": "Parse Data" - }, + "icon": "braces", + "key": "ParserComponent", + "legacy": false, + "lf_version": "1.6.3", + "metadata": {}, "minimized": false, "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "Message", + "display_name": "Parsed Text", "group_outputs": false, - "method": "parse_data", - "name": "text", + "method": "parse_combined_text", + "name": "parsed_text", "selected": "Message", "tool_mode": true, "types": [ "Message" ], "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "Data List", - "group_outputs": false, - "method": "parse_data_as_list", - "name": "data_list", - "selected": "Dict", - "tool_mode": true, - "types": [ - "Data" - ], - "value": "__UNDEFINED__" } ], "pinned": false, - "score": 0.28173906304863156, + "score": 0.001, "template": { "_type": "Component", "code": { @@ -672,56 +522,57 @@ "show": true, "title_case": false, "type": "code", - "value": "from langflow.custom.custom_component.component import Component\nfrom langflow.helpers.data import data_to_text, data_to_text_list\nfrom langflow.io import DataInput, MultilineInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.message import Message\n\n\nclass ParseDataComponent(Component):\n display_name = \"Data to Message\"\n description = \"Convert Data objects into Messages using any {field_name} from input data.\"\n icon = \"message-square\"\n name = \"ParseData\"\n legacy = True\n replacement = [\"processing.DataOperations\", \"processing.TypeConverterComponent\"]\n metadata = {\n \"legacy_name\": \"Parse Data\",\n }\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The data to convert to text.\",\n is_list=True,\n required=True,\n ),\n MultilineInput(\n name=\"template\",\n display_name=\"Template\",\n info=\"The template to use for formatting the data. \"\n \"It can contain the keys {text}, {data} or any other key in the Data.\",\n value=\"{text}\",\n required=True,\n ),\n StrInput(name=\"sep\", display_name=\"Separator\", advanced=True, value=\"\\n\"),\n ]\n\n outputs = [\n Output(\n display_name=\"Message\",\n name=\"text\",\n info=\"Data as a single Message, with each input Data separated by Separator\",\n method=\"parse_data\",\n ),\n Output(\n display_name=\"Data List\",\n name=\"data_list\",\n info=\"Data as a list of new Data, each having `text` formatted by Template\",\n method=\"parse_data_as_list\",\n ),\n ]\n\n def _clean_args(self) -> tuple[list[Data], str, str]:\n data = self.data if isinstance(self.data, list) else [self.data]\n template = self.template\n sep = self.sep\n return data, template, sep\n\n def parse_data(self) -> Message:\n data, template, sep = self._clean_args()\n result_string = data_to_text(template, data, sep)\n self.status = result_string\n return Message(text=result_string)\n\n def parse_data_as_list(self) -> list[Data]:\n data, template, _ = self._clean_args()\n text_list, data_list = data_to_text_list(template, data)\n for item, text in zip(data_list, text_list, strict=True):\n item.set_text(text)\n self.status = data_list\n return data_list\n" + "value": "from langflow.custom.custom_component.component import Component\nfrom langflow.helpers.data import safe_convert\nfrom langflow.inputs.inputs import BoolInput, HandleInput, MessageTextInput, MultilineInput, TabInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.template.field.base import Output\n\n\nclass ParserComponent(Component):\n display_name = \"Parser\"\n description = \"Extracts text using a template.\"\n documentation: str = \"https://docs.langflow.org/components-processing#parser\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"input_data\",\n display_name=\"Data or DataFrame\",\n input_types=[\"DataFrame\", \"Data\"],\n info=\"Accepts either a DataFrame or a Data object.\",\n required=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Parser\", \"Stringify\"],\n value=\"Parser\",\n info=\"Convert into raw string instead of using a template.\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"pattern\",\n display_name=\"Template\",\n info=(\n \"Use variables within curly brackets to extract column values for DataFrames \"\n \"or key values for Data.\"\n \"For example: `Name: {Name}, Age: {Age}, Country: {Country}`\"\n ),\n value=\"Text: {text}\", # Example default\n dynamic=True,\n show=True,\n required=True,\n ),\n MessageTextInput(\n name=\"sep\",\n display_name=\"Separator\",\n advanced=True,\n value=\"\\n\",\n info=\"String used to separate rows/items.\",\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Parsed Text\",\n name=\"parsed_text\",\n info=\"Formatted text output.\",\n method=\"parse_combined_text\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Dynamically hide/show `template` and enforce requirement based on `stringify`.\"\"\"\n if field_name == \"mode\":\n build_config[\"pattern\"][\"show\"] = self.mode == \"Parser\"\n build_config[\"pattern\"][\"required\"] = self.mode == \"Parser\"\n if field_value:\n clean_data = BoolInput(\n name=\"clean_data\",\n display_name=\"Clean Data\",\n info=(\n \"Enable to clean the data by removing empty rows and lines \"\n \"in each cell of the DataFrame/ Data object.\"\n ),\n value=True,\n advanced=True,\n required=False,\n )\n build_config[\"clean_data\"] = clean_data.to_dict()\n else:\n build_config.pop(\"clean_data\", None)\n\n return build_config\n\n def _clean_args(self):\n \"\"\"Prepare arguments based on input type.\"\"\"\n input_data = self.input_data\n\n match input_data:\n case list() if all(isinstance(item, Data) for item in input_data):\n msg = \"List of Data objects is not supported.\"\n raise ValueError(msg)\n case DataFrame():\n return input_data, None\n case Data():\n return None, input_data\n case dict() if \"data\" in input_data:\n try:\n if \"columns\" in input_data: # Likely a DataFrame\n return DataFrame.from_dict(input_data), None\n # Likely a Data object\n return None, Data(**input_data)\n except (TypeError, ValueError, KeyError) as e:\n msg = f\"Invalid structured input provided: {e!s}\"\n raise ValueError(msg) from e\n case _:\n msg = f\"Unsupported input type: {type(input_data)}. Expected DataFrame or Data.\"\n raise ValueError(msg)\n\n def parse_combined_text(self) -> Message:\n \"\"\"Parse all rows/items into a single text or convert input to string if `stringify` is enabled.\"\"\"\n # Early return for stringify option\n if self.mode == \"Stringify\":\n return self.convert_to_string()\n\n df, data = self._clean_args()\n\n lines = []\n if df is not None:\n for _, row in df.iterrows():\n formatted_text = self.pattern.format(**row.to_dict())\n lines.append(formatted_text)\n elif data is not None:\n formatted_text = self.pattern.format(**data.data)\n lines.append(formatted_text)\n\n combined_text = self.sep.join(lines)\n self.status = combined_text\n return Message(text=combined_text)\n\n def convert_to_string(self) -> Message:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n result = \"\"\n if isinstance(self.input_data, list):\n result = \"\\n\".join([safe_convert(item, clean_data=self.clean_data or False) for item in self.input_data])\n else:\n result = safe_convert(self.input_data or False)\n self.log(f\"Converted to string with length: {len(result)}\")\n\n message = Message(text=result)\n self.status = message\n return message\n" }, - "data": { - "_input_type": "DataInput", + "input_data": { + "_input_type": "HandleInput", "advanced": false, - "display_name": "Data", + "display_name": "Data or DataFrame", "dynamic": false, - "info": "The data to convert to text.", + "info": "Accepts either a DataFrame or a Data object.", "input_types": [ + "DataFrame", "Data" ], - "list": true, + "list": false, "list_add_label": "Add More", - "name": "data", + "name": "input_data", "placeholder": "", "required": true, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, "type": "other", "value": "" }, - "sep": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Separator", + "mode": { + "_input_type": "TabInput", + "advanced": false, + "display_name": "Mode", "dynamic": false, - "info": "", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "sep", + "info": "Convert into raw string instead of using a template.", + "name": "mode", + "options": [ + "Parser", + "Stringify" + ], "placeholder": "", + "real_time_refresh": true, "required": false, "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "\n" + "type": "tab", + "value": "Parser" }, - "template": { + "pattern": { "_input_type": "MultilineInput", "advanced": false, "copy_field": false, "display_name": "Template", - "dynamic": false, - "info": "The template to use for formatting the data. It can contain the keys {text}, {data} or any other key in the Data.", + "dynamic": true, + "info": "Use variables within curly brackets to extract column values for DataFrames or key values for Data.For example: `Name: {Name}, Age: {Age}, Country: {Country}`", "input_types": [ "Message" ], @@ -729,7 +580,7 @@ "list_add_label": "Add More", "load_from_db": false, "multiline": true, - "name": "template", + "name": "pattern", "placeholder": "", "required": true, "show": true, @@ -739,69 +590,887 @@ "trace_as_metadata": true, "type": "str", "value": "{text}" + }, + "sep": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Separator", + "dynamic": false, + "info": "String used to separate rows/items.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "sep", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "\n" } }, "tool_mode": false }, "showNode": true, - "type": "ParseData" + "type": "ParserComponent" }, "dragging": false, - "id": "ParseData-ABZoG", + "id": "ParserComponent-VotrW", "measured": { - "height": 397, + "height": 329, "width": 320 }, "position": { - "x": 924.4084570879688, - "y": 23.199013144570245 + "x": 814.96875, + "y": 435 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "ClinicalLLM-fOoUf", + "id": "Agent-CAhED", "node": { "base_classes": [ - "Data" + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Define the agent's instructions, then enter a task to complete using tools.", + "display_name": "Agent", + "documentation": "https://docs.langflow.org/agents", + "edited": false, + "field_order": [ + "agent_llm", + "max_tokens", + "model_kwargs", + "model_name", + "openai_api_base", + "api_key", + "temperature", + "seed", + "max_retries", + "timeout", + "system_prompt", + "n_messages", + "format_instructions", + "output_schema", + "tools", + "input_value", + "handle_parsing_errors", + "verbose", + "max_iterations", + "agent_description", + "add_current_date_tool" + ], + "frozen": false, + "icon": "bot", + "last_updated": "2025-10-29T10:53:22.379Z", + "legacy": false, + "lf_version": "1.6.3", + "metadata": {}, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Response", + "group_outputs": false, + "method": "message_response", + "name": "response", + "options": null, + "required_inputs": null, + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "add_current_date_tool": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Current Date", + "dynamic": false, + "info": "If true, will add a tool to the agent that returns the current date.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "add_current_date_tool", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "agent_description": { + "_input_type": "MultilineInput", + "advanced": true, + "copy_field": false, + "display_name": "Agent Description [Deprecated]", + "dynamic": false, + "info": "The description of the agent. This is only used when in Tool Mode. Defaults to 'A helpful assistant with access to the following tools:' and tools are added dynamically. This feature is deprecated and will be removed in future versions.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "agent_description", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "A helpful assistant with access to the following tools:" + }, + "agent_llm": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model Provider", + "dynamic": false, + "external_options": { + "fields": { + "data": { + "node": { + "display_name": "Connect other models", + "icon": "CornerDownLeft", + "name": "connect_other_models" + } + } + } + }, + "info": "The provider of the language model that the agent will use to generate responses.", + "input_types": [], + "name": "agent_llm", + "options": [ + "Anthropic", + "Google Generative AI", + "OpenAI", + "Azure OpenAI" + ], + "options_metadata": [ + { + "icon": "Anthropic" + }, + { + "icon": "GoogleGenerativeAI" + }, + { + "icon": "OpenAI" + }, + { + "icon": "Azure" + }, + { + "icon": "brain" + } + ], + "placeholder": "", + "real_time_refresh": true, + "refresh_button": false, + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Azure OpenAI" + }, + "api_key": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Azure Chat OpenAI API Key", + "dynamic": false, + "info": "", + "input_types": [], + "load_from_db": true, + "name": "api_key", + "password": true, + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "type": "str", + "value": "" + }, + "api_version": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "API Version", + "dynamic": false, + "external_options": {}, + "info": "", + "input_types": [], + "name": "api_version", + "options": [ + "2025-02-01-preview", + "2025-01-01-preview", + "2024-12-01-preview", + "2024-10-01-preview", + "2024-09-01-preview", + "2024-08-01-preview", + "2024-07-01-preview", + "2024-06-01", + "2024-03-01-preview", + "2024-02-15-preview", + "2023-12-01-preview", + "2023-05-15" + ], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "2025-01-01-preview" + }, + "azure_deployment": { + "_input_type": "MessageTextInput", + "advanced": false, + "display_name": "Deployment Name", + "dynamic": false, + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "azure_deployment", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "gpt-4o" + }, + "azure_endpoint": { + "_input_type": "MessageTextInput", + "advanced": false, + "display_name": "Azure Endpoint", + "dynamic": false, + "info": "Your Azure endpoint, including the resource. Example: `https://example-resource.azure.openai.com/`", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": true, + "name": "azure_endpoint", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "azure_endpoint_gpt-4o" + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "input_types": [], + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import (\n ToolCallingAgentComponent,\n)\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n IntInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\nfrom langflow.custom.default_providers import apply_provider_defaults\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"OpenAI\", \"Azure OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n if \"OpenAI\" in MODEL_PROVIDERS_DICT:\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n else:\n openai_inputs_filtered = []\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST],\n value=\"OpenAI\",\n real_time_refresh=True,\n refresh_button=False,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST if key in MODELS_METADATA]\n + [{\"icon\": \"brain\"}],\n external_options={\n \"fields\": {\n \"data\": {\n \"node\": {\n \"name\": \"connect_other_models\",\n \"display_name\": \"Connect other models\",\n \"icon\": \"CornerDownLeft\",\n }\n }\n },\n },\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\n \"true\",\n \"1\",\n \"t\",\n \"y\",\n \"yes\",\n ]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (\n ExceptionWithMessageError,\n ValueError,\n TypeError,\n RuntimeError,\n ) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (\n ExceptionWithMessageError,\n ValueError,\n TypeError,\n NotImplementedError,\n AttributeError,\n ) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(\n session_id=self.graph.session_id,\n order=\"Ascending\",\n n_messages=self.n_messages,\n )\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n \n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n build_config.update(fields_to_add)\n \n # Apply provider-specific defaults (only for Azure OpenAI currently)\n if field_value == \"Azure OpenAI\":\n build_config = apply_provider_defaults(field_value, build_config)\n \n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n \n elif field_value == \"connect_other_models\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST],\n real_time_refresh=True,\n refresh_button=False,\n input_types=[\"LanguageModel\"],\n placeholder=\"Awaiting model input.\",\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST if key in MODELS_METADATA],\n external_options={\n \"fields\": {\n \"data\": {\n \"node\": {\n \"name\": \"connect_other_models\",\n \"display_name\": \"Connect other models\",\n \"icon\": \"CornerDownLeft\",\n },\n }\n },\n },\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n \n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n \n # Rest of your existing method remains unchanged...\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\",\n tool_description=description,\n callbacks=self.get_langchain_callbacks(),\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n" + }, + "format_instructions": { + "_input_type": "MultilineInput", + "advanced": true, + "copy_field": false, + "display_name": "Output Format Instructions", + "dynamic": false, + "info": "Generic Template for structured output formatting. Valid only with Structured response.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "format_instructions", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "You are an AI that extracts structured JSON objects from unstructured text. Use a predefined schema with expected types (str, int, float, bool, dict). Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. Fill missing or ambiguous values with defaults: null for missing values. Remove exact duplicates but keep variations that have different field values. Always return valid JSON in the expected format, never throw errors. If multiple objects can be extracted, return them all in the structured format." + }, + "handle_parsing_errors": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Handle Parse Errors", + "dynamic": false, + "info": "Should the Agent fix errors when reading user input for better processing?", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "handle_parsing_errors", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "input_value": { + "_input_type": "MessageInput", + "advanced": false, + "display_name": "Input", + "dynamic": false, + "info": "The input provided by the user for the agent to process.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "input_value", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": true, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "max_iterations": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Max Iterations", + "dynamic": false, + "info": "The maximum number of attempts the agent can make to complete its task before it stops.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "max_iterations", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 15 + }, + "max_tokens": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Max Tokens", + "dynamic": false, + "info": "The maximum number of tokens to generate. Set to 0 for unlimited tokens.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "max_tokens", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 0 + }, + "n_messages": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Number of Chat History Messages", + "dynamic": false, + "info": "Number of chat history messages to retrieve.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "n_messages", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 100 + }, + "output_schema": { + "_input_type": "TableInput", + "advanced": true, + "display_name": "Output Schema", + "dynamic": false, + "info": "Schema Validation: Define the structure and data types for structured output. No validation if no output schema.", + "input_types": [], + "is_list": true, + "list_add_label": "Add More", + "name": "output_schema", + "placeholder": "", + "required": false, + "show": true, + "table_icon": "Table", + "table_schema": { + "columns": [ + { + "default": "field", + "description": "Specify the name of the output field.", + "disable_edit": false, + "display_name": "Name", + "edit_mode": "inline", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "name", + "sortable": true, + "type": "str" + }, + { + "default": "description of field", + "description": "Describe the purpose of the output field.", + "disable_edit": false, + "display_name": "Description", + "edit_mode": "popover", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "description", + "sortable": true, + "type": "str" + }, + { + "default": "str", + "description": "Indicate the data type of the output field (e.g., str, int, float, bool, dict).", + "disable_edit": false, + "display_name": "Type", + "edit_mode": "inline", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "type", + "options": [ + "str", + "int", + "float", + "bool", + "dict" + ], + "sortable": true, + "type": "str" + }, + { + "default": false, + "description": "Set to True if this output field should be a list of the specified type.", + "disable_edit": false, + "display_name": "As List", + "edit_mode": "inline", + "filterable": true, + "formatter": "boolean", + "hidden": false, + "name": "multiple", + "sortable": true, + "type": "boolean" + } + ] + }, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "trigger_icon": "Table", + "trigger_text": "Open table", + "type": "table", + "value": [] + }, + "system_prompt": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "Agent Instructions", + "dynamic": false, + "info": "System Prompt: Initial instructions and context provided to guide the agent's behavior.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "system_prompt", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "You are a clinical data specialist. Extract clinical entities from medical documents.\n\n## TASK\n\nWhen you receive medical text:\n1. Call the clinical entity extraction tool with the raw text\n2. Report all entities found in the prediction array\n\n## OUTPUT FORMAT\n\n**Clinical Entities Extracted:**\n- Total entities: [count]\n\n**By Category:**\n- MEDICAL_CONDITION: [list]\n- MEDICATION: [list]\n- TEST_TREATMENT_PROCEDURE: [list]\n- ANATOMY: [list]\n- PROTECTED_HEALTH_INFORMATION: [list]\n\n## RULES\n\n1. ALWAYS call the entity extraction tool\n2. DO NOT skip the tool call\n3. Report exactly what the tool returns\n" + }, + "temperature": { + "_input_type": "SliderInput", + "advanced": true, + "display_name": "Temperature", + "dynamic": false, + "info": "Controls randomness. Lower values are more deterministic, higher values are more creative.", + "input_types": [], + "max_label": "", + "max_label_icon": "", + "min_label": "", + "min_label_icon": "", + "name": "temperature", + "placeholder": "", + "range_spec": { + "max": 2, + "min": 0, + "step": 0.01, + "step_type": "float" + }, + "required": false, + "show": true, + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "title_case": false, + "tool_mode": false, + "type": "slider", + "value": 0.08 + }, + "tools": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Tools", + "dynamic": false, + "info": "These are the tools that the agent can use to help with tasks.", + "input_types": [ + "Tool" + ], + "list": true, + "list_add_label": "Add More", + "name": "tools", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "verbose": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Verbose", + "dynamic": false, + "info": "", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "verbose", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "Agent" + }, + "dragging": false, + "id": "Agent-CAhED", + "measured": { + "height": 759, + "width": 320 + }, + "position": { + "x": 1357.30738870804, + "y": 115.62969730913994 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "AutonomizeModel-Tr1Ab", + "node": { + "base_classes": [ + "Data" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Unified interface for Autonomize text-based AI models with dropdown selection", + "display_name": "Autonomize Model", + "documentation": "https://docs.example.com/autonomize-models", + "edited": false, + "field_order": [ + "selected_model", + "search_query" + ], + "frozen": false, + "icon": "Autonomize", + "last_updated": "2025-10-29T10:53:21.381Z", + "legacy": false, + "lf_version": "1.6.3", + "metadata": {}, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Toolset", + "group_outputs": false, + "hidden": null, + "method": "to_toolkit", + "name": "component_as_tool", + "options": null, + "required_inputs": null, + "selected": "Tool", + "tool_mode": true, + "types": [ + "Tool" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "priority": 1, + "template": { + "_type": "Component", + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "\"\"\"Autonomize Model Component - Unified text-based model component with dropdown selection.\"\"\"\n\nimport ast\nimport json\nfrom typing import Any\n\nfrom langflow.services.modelhub.model_endpoint import ModelEndpoint\nfrom loguru import logger\n\nfrom langflow.base.modelhub import ATModelComponent\nfrom langflow.inputs.inputs import FieldTypes\nfrom langflow.io import DropdownInput, MultilineInput, Output\nfrom langflow.schema.data import Data\n\n\nclass AutonomizeModelComponent(ATModelComponent):\n \"\"\"Unified component for Autonomize text-based models with dropdown selection.\"\"\"\n\n display_name: str = \"Autonomize Model\"\n description: str = \"Unified interface for Autonomize text-based AI models with dropdown selection\"\n documentation: str = \"https://docs.example.com/autonomize-models\"\n icon: str = \"Autonomize\"\n name: str = \"AutonomizeModel\"\n category: str = \"models\"\n priority: int = 1 # High priority to appear near top\n\n # Model mapping for dropdown options\n MODEL_OPTIONS = {\n \"Clinical LLM\": ModelEndpoint.CLINICAL_LLM,\n \"Clinical Note Classifier\": ModelEndpoint.CLINICAL_NOTE_CLASSIFIER,\n \"Combined Entity Linking\": ModelEndpoint.COMBINED_ENTITY_LINKING,\n \"CPT Code\": ModelEndpoint.CPT_CODE,\n \"ICD-10 Code\": ModelEndpoint.ICD_10,\n \"RxNorm Code\": ModelEndpoint.RXNORM,\n \"Short Summary\": ModelEndpoint.SHORT_SUMMARY,\n \"Detailed Summary\": ModelEndpoint.DETAILED_SUMMARY,\n \"Page Level Classifier\": ModelEndpoint.PAGE_LEVEL_CLASSIFIER\n }\n\n # Model descriptions for UI\n MODEL_DESCRIPTIONS = {\n \"Clinical LLM\": \"Extract clinical entities from medical text\",\n \"Clinical Note Classifier\": \"Classify clinical notes by type\",\n \"Combined Entity Linking\": \"Link extracted entities to standard vocabularies\",\n \"CPT Code\": \"Extract CPT codes from medical text\",\n \"ICD-10 Code\": \"Extract ICD-10 codes from medical text\",\n \"RxNorm Code\": \"Extract RxNorm codes for medications\",\n \"Short Summary\": \"short summary\",\n \"Detailed Summary\": \"detailed summary\",\n \"Page Level Classifier\": \"page type classification\"\n }\n\n inputs = [\n DropdownInput(\n name=\"selected_model\",\n display_name=\"Model\",\n options=list(MODEL_OPTIONS.keys()),\n value=next(iter(MODEL_OPTIONS.keys())),\n info=\"Select the Autonomize model to use\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"search_query\",\n display_name=\"Text Input\",\n field_type=FieldTypes.TEXT,\n multiline=True,\n tool_mode=True,\n info=\"Input text to process with the selected model\",\n ),\n ]\n\n outputs = [\n Output(\n name=\"prediction\",\n display_name=\"Model Output\",\n method=\"build_output\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._current_model_endpoint = None\n # Initialize _model_name with the default model endpoint (required by ATModelComponent)\n self._model_name = self.MODEL_OPTIONS[next(iter(self.MODEL_OPTIONS.keys()))]\n\n @property\n def model_endpoint(self) -> ModelEndpoint:\n \"\"\"Get the current model endpoint based on selection.\"\"\"\n return self.MODEL_OPTIONS[self.selected_model]\n\n @property\n def model_name_from_endpoint(self) -> str:\n \"\"\"Get the model name from the ModelEndpoint.\"\"\"\n return self.model_endpoint.get_model()\n\n async def extract_entities(self, text: Any) -> dict:\n \"\"\"Extract entities using the selected model.\"\"\"\n # Handle different input formats\n if isinstance(text, str) and text.strip().startswith(\"{\"):\n try:\n text_dict = json.loads(text)\n text = text_dict\n except json.JSONDecodeError as e:\n logger.error(f\"Failed to parse JSON string: {e}\")\n # If JSON parsing fails, use the original text\n\n # Handle the case where input is a dictionary with result structure\n if isinstance(text, dict) and \"result\" in text:\n result = text[\"result\"]\n if isinstance(result, list) and len(result) > 0:\n # Extract text from the first result item\n first_result = result[0]\n if isinstance(first_result, dict) and \"text\" in first_result:\n extracted_text = first_result[\"text\"]\n text = extracted_text\n else:\n msg = \"First result item does not contain 'text' key\"\n raise ValueError(msg)\n else:\n msg = \"Result list is empty or not a list\"\n raise ValueError(msg)\n elif isinstance(text, dict) and \"text\" in text:\n text = text[\"text\"]\n elif hasattr(text, \"text\"):\n text = text.text\n\n try:\n # Use the standard predict method from ATModelComponent\n # Set the _model_name based on current selection\n self._model_name = self.model_endpoint\n\n response = await self.predict(text=text)\n\n # Handle string responses\n if isinstance(response, str):\n try:\n response = ast.literal_eval(response)\n except (ValueError, SyntaxError):\n # If it's not a valid Python literal, try JSON\n try:\n response = json.loads(response)\n except json.JSONDecodeError:\n # If neither works, wrap in a dict\n response = {\"result\": response}\n else:\n return response\n except Exception as e:\n msg = f\"Error processing with {self.model_name}: {e!s}\"\n logger.error(f\"API call failed: {msg}\")\n raise ValueError(msg) from e\n\n async def build_output(self) -> Data:\n \"\"\"Generate the output based on selected model.\"\"\"\n query_results = await self.extract_entities(self.search_query)\n\n # Create standardized output format\n output_data = {\n \"model\": self.selected_model,\n \"model_description\": self.MODEL_DESCRIPTIONS.get(self.selected_model, \"\"),\n \"data\": query_results\n }\n\n data = Data(value=output_data)\n self.status = f\"Processed with {self.selected_model}\"\n return data\n\n def build(self):\n \"\"\"Return the main build function for Langflow framework.\"\"\"\n return self.build_output\n" + }, + "search_query": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "Text Input", + "dynamic": false, + "info": "Input text to process with the selected model", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "search_query", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": true, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "selected_model": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model", + "dynamic": false, + "external_options": {}, + "info": "Select the Autonomize model to use", + "name": "selected_model", + "options": [ + "Clinical LLM", + "Clinical Note Classifier", + "Combined Entity Linking", + "CPT Code", + "ICD-10 Code", + "RxNorm Code", + "Short Summary", + "Detailed Summary", + "Page Level Classifier" + ], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Clinical LLM" + }, + "tools_metadata": { + "_input_type": "ToolsInput", + "advanced": false, + "display_name": "Actions", + "dynamic": false, + "info": "Modify tool names and descriptions to help agents understand when to use each tool.", + "is_list": true, + "list_add_label": "Add More", + "name": "tools_metadata", + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "tools", + "value": [ + { + "args": { + "search_query": { + "default": "", + "description": "Input text to process with the selected model", + "title": "Search Query", + "type": "string" + } + }, + "description": "used to extract clinical entities from the text", + "display_description": "Unified interface for Autonomize text-based AI models with dropdown selection", + "display_name": "build_output", + "name": "clinical_entity_extractor", + "readonly": false, + "status": true, + "tags": [ + "build_output" + ] + } + ] + } + }, + "tool_mode": true + }, + "showNode": true, + "type": "AutonomizeModel" + }, + "dragging": false, + "id": "AutonomizeModel-Tr1Ab", + "measured": { + "height": 300, + "width": 320 + }, + "position": { + "x": 605.4284832186538, + "y": -9.620260907754396 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "ChatOutput-DEzJg", + "node": { + "base_classes": [ + "Message" ], "beta": false, - "category": "autonomize_models", + "category": "input_output", "conditional_paths": [], "custom_fields": {}, - "description": "Extract clinical entities from text using Clinical LLM.", - "display_name": "Clinical LLM", - "documentation": "https://docs.example.com/clinical-llm", + "description": "Display a chat message in the Playground.", + "display_name": "Chat Output", + "documentation": "https://docs.langflow.org/components-io#chat-output", "edited": false, "field_order": [ - "search_query" + "input_value", + "should_store_message", + "sender", + "sender_name", + "session_id", + "data_template" ], "frozen": false, - "icon": "Autonomize", - "key": "ClinicalLLM", + "icon": "MessagesSquare", + "key": "ChatOutput", "legacy": false, - "lf_version": "1.4.3", + "lf_version": "1.6.3", "metadata": {}, - "minimized": false, + "minimized": true, "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "Clinical Entities", - "method": "build_output", - "name": "prediction", - "selected": "Data", + "display_name": "Output Message", + "group_outputs": false, + "method": "message_response", + "name": "message", + "selected": "Message", "tool_mode": true, "types": [ - "Data" + "Message" ], "value": "__UNDEFINED__" } ], "pinned": false, - "score": 0.003095236623404538, + "score": 0.003169567463043492, "template": { "_type": "Component", "code": { @@ -820,23 +1489,114 @@ "show": true, "title_case": false, "type": "code", - "value": "from langflow.inputs.input_mixin import FieldTypes\nfrom langflow.io import MultilineInput, Output\nfrom langflow.schema import Data\nfrom pydantic import BaseModel\nimport json\nimport logging\n\nfrom app.base.modelhub import ATModelComponent\nfrom app.services.modelhub.model_endpoint import ModelEndpoint\n\nlogger = logging.getLogger(__name__)\n\n\nclass Trait(BaseModel):\n Name: str\n Score: float\n\n\nclass Attribute(BaseModel):\n Id: int\n BeginOffset: int\n EndOffset: int\n Text: str\n Score: float\n Category: str\n Type: str\n Traits: list[Trait]\n\n\nclass Entity(BaseModel):\n Category: str\n Type: str\n Text: str\n BeginOffset: int\n EndOffset: int\n Score: float\n Traits: list[Trait]\n Id: int\n Attributes: list[Attribute] | None = None\n\n\nclass ClinicalPrediction(BaseModel):\n prediction: list[Entity]\n\n\nclass ClinicalLLMComponent(ATModelComponent):\n \"\"\"Component for the Clinical LLM model\"\"\"\n\n display_name: str = \"Clinical LLM\"\n description: str = \"Extract clinical entities from text using Clinical LLM.\"\n documentation: str = \"https://docs.example.com/clinical-llm\"\n icon: str = \"Autonomize\"\n name: str = \"ClinicalLLM\"\n _model_name = ModelEndpoint.CLINICAL_LLM\n\n inputs = [\n MultilineInput(\n name=\"search_query\",\n display_name=\"Search query\",\n field_type=FieldTypes.TEXT,\n multiline=True,\n )\n ]\n\n outputs = [\n Output(\n name=\"prediction\", display_name=\"Clinical Entities\", method=\"build_output\"\n ),\n ]\n\n async def extract_entities(self, text) -> ClinicalPrediction:\n \"\"\"Extract clinical entities from the input text\"\"\"\n # Handle the case where input is a JSON string\n if isinstance(text, str) and text.strip().startswith('{'):\n try:\n import json\n text_dict = json.loads(text)\n text = text_dict\n except json.JSONDecodeError as e:\n logger.error(f\"Failed to parse JSON string: {e}\")\n # If JSON parsing fails, use the original text\n pass\n \n # Handle the case where input is a dictionary with result structure\n if isinstance(text, dict) and \"result\" in text:\n result = text[\"result\"]\n if isinstance(result, list) and len(result) > 0:\n # Extract text from the first result item\n first_result = result[0]\n if isinstance(first_result, dict) and \"text\" in first_result:\n extracted_text = first_result[\"text\"]\n text = extracted_text\n else:\n raise ValueError(\"First result item does not contain 'text' key\")\n else:\n raise ValueError(\"Result list is empty or not a list\")\n elif isinstance(text, dict) and \"text\" in text:\n text = text[\"text\"]\n elif hasattr(text, \"text\"):\n text = text.text\n \n try:\n response = await self.predict(text=text)\n return ClinicalPrediction(**response)\n except Exception as e:\n msg = f\"Error extracting clinical entities: {e!s}\"\n logger.error(f\"API call failed: {msg}\")\n raise ValueError(msg) from e\n\n async def build_output(self) -> Data:\n \"\"\"Generate the output based on selected knowledgehub hubs.\"\"\"\n query_results = await self.extract_entities(self.search_query)\n data = Data(value={\"data\": query_results})\n self.status = data\n return data\n \n def build(self):\n \"\"\"Return the main build function for Langflow framework.\"\"\"\n return self.build_output\n" + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom langflow.base.io.chat import ChatComponent\nfrom langflow.helpers.data import safe_convert\nfrom langflow.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.schema.properties import Source\nfrom langflow.template.field.base import Output\nfrom langflow.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, icon, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, - "search_query": { - "_input_type": "MultilineInput", + "data_template": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Data Template", + "dynamic": false, + "info": "Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "data_template", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "{text}" + }, + "input_value": { + "_input_type": "HandleInput", "advanced": false, - "copy_field": false, - "display_name": "Search query", + "display_name": "Inputs", "dynamic": false, - "info": "", + "info": "Message to be passed as output.", + "input_types": [ + "Data", + "DataFrame", + "Message" + ], + "list": false, + "list_add_label": "Add More", + "name": "input_value", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "sender": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Sender Type", + "dynamic": false, + "external_options": {}, + "info": "Type of sender.", + "name": "sender", + "options": [ + "Machine", + "User" + ], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Machine" + }, + "sender_name": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Sender Name", + "dynamic": false, + "info": "Name of the sender.", "input_types": [ "Message" ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "multiline": true, - "name": "search_query", + "name": "sender_name", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "AI" + }, + "session_id": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Session ID", + "dynamic": false, + "info": "The session ID of the chat. If empty, the current session ID parameter will be used.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "session_id", "placeholder": "", "required": false, "show": true, @@ -846,50 +1606,71 @@ "trace_as_metadata": true, "type": "str", "value": "" + }, + "should_store_message": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Store Messages", + "dynamic": false, + "info": "Store the message in the history.", + "list": false, + "list_add_label": "Add More", + "name": "should_store_message", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true } }, "tool_mode": false }, - "showNode": true, - "type": "ClinicalLLM" + "showNode": false, + "type": "ChatOutput" }, "dragging": false, - "id": "ClinicalLLM-fOoUf", + "id": "ChatOutput-DEzJg", "measured": { - "height": 253, - "width": 320 + "height": 48, + "width": 192 }, "position": { - "x": 1511.252521412978, - "y": 17.64528382919157 + "x": 1719.2098458607636, + "y": 351.74900394220043 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "JSONOutput-b6rNP", + "id": "BlobStorage-sNo3r", "node": { "base_classes": [ - "Message" + "Data" ], "beta": false, - "category": "outputs", + "category": "input_output", "conditional_paths": [], "custom_fields": {}, - "description": "Display input data as JSON in the Playground.", - "display_name": "JSON Output", - "documentation": "", + "description": "Load files from Azure Blob Storage", + "display_name": "Blob Storage", + "documentation": "http://docs.langflow.org/components/storage", "edited": false, "field_order": [ - "data", - "pretty_print" + "storage_account", + "container_name", + "file_name", + "return_all_files" ], "frozen": false, - "icon": "Braces", - "key": "JSONOutput", + "icon": "Autonomize", + "key": "BlobStorage", + "last_updated": "2025-10-28T20:15:03.077Z", "legacy": false, - "lf_version": "1.4.3", + "lf_version": "1.6.3", "metadata": {}, "minimized": false, "output_types": [], @@ -897,20 +1678,22 @@ { "allows_loop": false, "cache": true, - "display_name": "JSON", + "display_name": "File Path", "group_outputs": false, - "method": "json_response", - "name": "json", - "selected": "Message", + "method": "get_file_paths", + "name": "file_path", + "options": null, + "required_inputs": null, + "selected": "Data", "tool_mode": true, "types": [ - "Message" + "Data" ], "value": "__UNDEFINED__" } ], "pinned": false, - "score": 0.007568328950209746, + "score": 0.2945640631554785, "template": { "_type": "Component", "code": { @@ -929,39 +1712,266 @@ "show": true, "title_case": false, "type": "code", - "value": "import json\n\nfrom langflow.base.io.text import TextComponent\nfrom langflow.inputs import DataInput\nfrom langflow.io import BoolInput, Output\nfrom langflow.schema import Data\nfrom langflow.schema.message import Message\n\n\nclass JSONOutputComponent(TextComponent):\n display_name = \"JSON Output\"\n description = \"Display input data as JSON in the Playground.\"\n icon = \"Braces\"\n name = \"JSONOutput\"\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The data to convert to JSON.\",\n is_list=True,\n ),\n BoolInput(\n name=\"pretty_print\",\n display_name=\"Pretty Print\",\n info=\"Format JSON with proper indentation\",\n value=True,\n advanced=True,\n ),\n ]\n outputs = [\n Output(display_name=\"JSON\", name=\"json\", method=\"json_response\"),\n ]\n\n def _process_data(self, data: Data | list[Data]) -> dict | list:\n \"\"\"Convert Data object(s) to dictionary/list format.\"\"\"\n if isinstance(data, list):\n return [item.dict() for item in data]\n return data.dict()\n\n def json_response(self) -> Message:\n try:\n # Process the Data input\n processed_data = self._process_data(self.data)\n\n # Convert to JSON string with optional pretty printing\n if self.pretty_print:\n formatted_json = json.dumps(\n processed_data, indent=2, ensure_ascii=False\n )\n else:\n formatted_json = json.dumps(processed_data, ensure_ascii=False)\n\n message = Message(text=formatted_json)\n self.status = formatted_json\n return message\n\n except Exception as e:\n error_message = f\"Error processing data to JSON: {e!s}\"\n message = Message(text=error_message)\n self.status = error_message\n return message\n" + "value": "\"\"\"Blob Storage Component for loading files from Azure Blob Storage.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.services.deps import get_flexstore_service\nfrom loguru import logger\n\n\nclass BlobStorageComponent(Component):\n display_name = \"Blob Storage\"\n category: str = \"input_output\"\n description = \"Load files from Azure Blob Storage\"\n documentation = \"http://docs.langflow.org/components/storage\"\n icon = \"Autonomize\"\n name = \"BlobStorage\"\n\n # Match the property name expected by FileComponent\n FILE_PATH_FIELD = \"file_path\"\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._container_list: list[str] = []\n self._file_list: list[str] = []\n\n inputs = [\n StrInput(\n name=\"storage_account\",\n display_name=\"Storage Account\",\n required=False,\n info=\"Storage Account name\",\n advanced=True,\n ),\n DropdownInput(\n name=\"container_name\",\n display_name=\"Container\",\n info=\"Select a container from the storage account\",\n required=True,\n refresh_button=True,\n ),\n DropdownInput(\n name=\"file_name\",\n display_name=\"File\",\n info=\"Select a file from the container\",\n required=True,\n refresh_button=True,\n ),\n BoolInput(\n name=\"return_all_files\",\n display_name=\"Return All Files\",\n info=\"If true and no specific file is selected, returns all files in the container\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"file_path\", # Match the property name expected by FileComponent\n display_name=\"File Path\",\n method=\"get_file_paths\",\n ),\n ]\n\n async def update_build_config(\n self, build_config: dict, field_value: Any, field_name: str | None = None\n ):\n \"\"\"Update the build configuration based on field changes.\"\"\"\n logger.info(f\"update_build_config called with field_name: {field_name}\")\n\n storage_account = getattr(self, \"storage_account\", None)\n container_name = getattr(self, \"container_name\", None)\n\n if field_name == \"container_name\":\n try:\n # Load the container options when the field is refreshed\n service = get_flexstore_service()\n self._container_list = await service.get_containers(storage_account)\n\n build_config[\"container_name\"][\"options\"] = self._container_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating container list: {e!s}\")\n raise\n\n elif field_name == \"file_name\" and container_name:\n try:\n # Load the file options when the field is refreshed\n service = get_flexstore_service()\n self._file_list = await service.get_files(\n storage_account, container_name\n )\n\n build_config[\"file_name\"][\"options\"] = self._file_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating file list: {e!s}\")\n raise\n\n return build_config\n\n async def get_file_paths(self) -> list[Data]:\n \"\"\"Get file paths for the FileComponent to process.\"\"\"\n try:\n if not self.container_name:\n logger.warning(\"Container name is required.\")\n return []\n\n service = get_flexstore_service()\n file_paths = []\n\n # If a specific file is selected\n if self.file_name:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, self.file_name\n )\n if signed_url:\n file_paths = [Data(data={self.FILE_PATH_FIELD: signed_url})]\n # If no specific file is selected and return_all_files is True\n elif self.return_all_files:\n files = await service.get_files(\n self.storage_account, self.container_name\n )\n for file in files:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, file\n )\n if signed_url:\n file_paths.append(Data(data={self.FILE_PATH_FIELD: signed_url}))\n\n if file_paths:\n self.status = file_paths\n logger.info(f\"Generated {len(file_paths)} file paths\")\n for path in file_paths:\n logger.debug(f\"File path: {path.data.get(self.FILE_PATH_FIELD)}\")\n else:\n logger.warning(\"No file paths generated\")\n\n return file_paths\n\n except Exception as e:\n logger.error(f\"Error in get_file_paths: {e!s}\")\n return []\n" + }, + "container_name": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Container", + "dynamic": false, + "external_options": {}, + "info": "Select a container from the storage account", + "name": "container_name", + "options": [ + "a-and-g-case-summary", + "a-and-g-case-summary-for-client", + "a-and-g-input", + "a-and-g-ocr-cache", + "a-and-g-ocr-post-processed-cache", + "a-and-g-page-images", + "a-and-g-reimagined", + "ai-studio-v2", + "aircare-dev-4002", + "aml-environment-image-build", + "ang-images", + "anywhere-200-files-input", + "anywhere-200-files-output", + "anywhere-input-docs-test", + "anywhere-output-docs-test", + "argo-flow-artifacts", + "atena", + "athena-data", + "autonomize-mlflow-artifacts", + "azure-webjobs-hosts", + "azure-webjobs-secrets", + "azureml", + "azureml-blobstore-3894b54e-0ee2-4e07-9b73-c3b30dc36b53", + "azureml-environments", + "azureml-metrics", + "backup-of-aws-instance", + "bcbs-ma-interqual-medical-policy", + "bcbs-medical-policy", + "benefit-accumulator-guidelines", + "benefit-check-eoc", + "benefit-eoc-guidelines", + "benefit-eoc-output", + "benefit-eoc-payload", + "bluecross-ca", + "carelon-guidelines", + "carelon-guidelines-v2", + "claims-docs", + "claims-qnext-response", + "cllm-v2-data", + "cms", + "correspondance-examples", + "datavant-storage-container", + "eoc-documents", + "etf-images", + "evicore-guidelines", + "fax-documents", + "fax-images", + "fax-images-2", + "fax-insights", + "fax-ocr-cache", + "fax-processor-validation-datasets", + "genesis-container", + "genesis-correspondence-automation-copilot", + "genesis-correspondence-bulk-ingestion-test", + "genesis-cph-demo-v2", + "genesis-dev-test-3012", + "genesis-dev-v2", + "genesis-platform-3010", + "genesis-platform-anywhere-prod", + "genesis-platform-cigna-dev", + "genesis-platform-demo", + "genesis-platform-demo-v2", + "genesis-platform-dev", + "genesis-platform-molina-uat", + "genesis-platform-qa", + "genesis-platform-v2-lab", + "hedis-page-images", + "hedis-page-ocr", + "indexbackup", + "insights-logs-auditevent", + "insights-metrics-pt1m", + "issue-test-1", + "job-test", + "k-hub-container", + "knowledgehubembeddings", + "load-testing-files-hedis", + "mail-images", + "mlflow", + "mlflow-dev-v2", + "mlflow-integration", + "mlflow-mssql", + "mlflow-qa", + "mlflowmssql", + "mlserver-artifacts", + "mlworkspace-backup", + "model-artifacts", + "modelcards", + "modelhub", + "modelhub-demo-v2", + "modelhub-v2-lab", + "models", + "molina-refactor-temporary", + "molina-refactor-test", + "mosaic-models", + "mosaic-provider-contracts", + "mosaic-provider-contracts-temp", + "mosaic-testing", + "ner-container", + "nestedcontainer", + "pcp-docs", + "pcp-extraction-docs", + "pcp-ocr-cache", + "pharmacy-auth", + "prior-auth", + "prior-authorization", + "projectx-files", + "projectx-temp-files", + "revisions", + "shahabas-mlflow-artifacts", + "snapshots", + "snapshotzips", + "spog-output", + "spog-qnext", + "string", + "temp-directory-genesis-studio", + "temp-studio", + "temp-studio-v2", + "temporal-poc", + "test-cms", + "test-storage-container", + "test1", + "tester-fax", + "umk2dev", + "weaviate-backups" + ], + "options_metadata": [], + "placeholder": "", + "refresh_button": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "genesis-platform-demo" }, - "data": { - "_input_type": "DataInput", + "file_name": { + "_input_type": "DropdownInput", "advanced": false, - "display_name": "Data", + "combobox": false, + "dialog_inputs": {}, + "display_name": "File", "dynamic": false, - "info": "The data to convert to JSON.", - "input_types": [ - "Data" + "external_options": {}, + "info": "Select a file from the container", + "name": "file_name", + "options": [ + "01_CaseId_MS_001.pdf", + "01_case1_redacted.pdf", + "02_0case.pdf", + "02_KJJ copy 6.pdf", + "02_KJJ copy 7.pdf", + "1056_Subject (1).pdf", + "1900_PriorAuthSample.pdf", + "2001_Subject (1).pdf", + "2001_Subject-1.pdf", + "2001_Subject.pdf", + "28ee05bd-5a48-49c1-ae3b-f81c7d1523bftesttest123.pdf", + "400_PATIENT (1).pdf", + "400_PATIENT.pdf", + "5af97bb72e4e4375829efa0a8f2db776.pdf", + "66f949a74ecda992ccc91ade_66f957aa4ecda992ccc91c09_1.png", + "6_SampleAuthScan.pdf", + "913 Manual Pend Codes - All States Medicaid and Medicare - Job Aid (1).pdf", + "913 Manual Pend Codes - All States Medicaid and Medicare - Job Aid.pdf", + "99736_PermE8_Studies_ALL.pdf", + "99736_PermE8_Studies_ALL123.pdf", + "A Retrospective Study of Clinical Efficacy.pdf", + "AGMT - HSA - BCA of Detroit LLC dba BCA StoneCrest Center (1).pdf", + "Alta Bates 2.pdf", + "Authorization Business Process - All States and LOBs - SOP.pdf", + "BSS-CMS-1500-Fillable-2022-1 2.png", + "BSS-CMS-1500-Fillable-2022-1.png", + "BSS-CMS-1500-Fillable-2022.pdf", + "BSS-CMS-1500-Fillable.png", + "Banner_Square_Code.png", + "CMS 1500_SAMPLE_2024.png", + "CMS 1500_Sample.pdf", + "Case8_PAC.pdf", + "Clotrimazole.pdf", + "Cody Bradshaw Resume.pdf", + "Coordination of Benefits (COB) - All States Medicaid - SOP (1).pdf", + "Coordination of Benefits (COB) - All States Medicaid - SOP (3).pdf", + "Coordination of Benefits (COB) - All States Medicaid - SOP.pdf", + "Corrected Claims and Replacement Bills - Medicare All States- Processing Guideline (1).pdf", + "Corrected Claims and Replacement Bills - Medicare All States- Processing Guideline .pdf", + "Duplicate Claim Processing - Medicare All States - Processing Guideline (1).pdf", + "Formulation PDF example (from FMS System).pdf", + "Formulation attachments PDF2.pdf", + "Gabapentin.pdf", + "PCP 26 JD.pdf", + "PCP 34 JNP.pdf", + "Patient Information HC2 - CBP.pdf", + "Prior Auth Sample 1.pdf", + "PriorAuth000.pdf", + "PriorAuth000_1.pdf", + "PriorAuthSample1_2.pdf", + "PriorAuthSample2 (1).pdf", + "PriorAuthSample2 (2).pdf", + "PriorAuthSample2.pdf", + "PriorAuthSample3 copy.pdf", + "PriorAuthSample3.pdf", + "PriorAuthSample5.pdf", + "Residential Purchase Agreement.pdf", + "Screenshot 2024-10-09 at 13.26.14.png", + "Screenshot 2024-10-10 at 13.23.26.png", + "Screenshot 2024-10-10 at 15.19.00.png", + "Screenshot 2024-10-21 at 12.49.29 PM.png", + "Screenshot 2024-10-21 at 18.05.46.png", + "Standard_Mutual_NDA__2024_.docx.pdf", + "Test1tolstoy (1).pdf", + "UB04 SAMPLE .pdf", + "UB04 SAMPLE .png", + "ZIP5lyout.pdf", + "complaince_summary.pdf", + "discharge_summary.pdf", + "form-1a.pdf", + "only-toc.pdf", + "patient_12783127.pdf", + "patient_12783135-2.pdf", + "patient_summary.pdf", + "prior-auth-doc.pdf", + "source_1729618954942.pdf", + "test2tolstoy (1).pdf" ], - "list": true, - "list_add_label": "Add More", - "name": "data", + "options_metadata": [], "placeholder": "", - "required": false, + "refresh_button": true, + "required": true, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, - "type": "other", - "value": "" + "type": "str", + "value": "discharge_summary.pdf" }, - "pretty_print": { + "return_all_files": { "_input_type": "BoolInput", - "advanced": true, - "display_name": "Pretty Print", + "advanced": false, + "display_name": "Return All Files", "dynamic": false, - "info": "Format JSON with proper indentation", + "info": "If true and no specific file is selected, returns all files in the container", "list": false, "list_add_label": "Add More", - "name": "pretty_print", + "name": "return_all_files", "placeholder": "", "required": false, "show": true, @@ -970,38 +1980,58 @@ "trace_as_metadata": true, "type": "bool", "value": true + }, + "storage_account": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Storage Account", + "dynamic": false, + "info": "Storage Account name", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "storage_account", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" } }, "tool_mode": false }, "showNode": true, - "type": "JSONOutput" + "type": "BlobStorage" }, "dragging": false, - "id": "JSONOutput-b6rNP", + "id": "BlobStorage-sNo3r", "measured": { - "height": 195, + "height": 329, "width": 320 }, "position": { - "x": 1933.3359493817545, - "y": -98.9830317937598 + "x": 106.8563794636986, + "y": -7.938728552541161 }, "selected": false, "type": "genericNode" } ], "viewport": { - "x": 156.13563493643096, - "y": 410.0199971524837, - "zoom": 0.5047026748113025 + "x": -7.502345250301744, + "y": 192.0929632011591, + "zoom": 0.4539021955799936 } }, - "description": "Navigate the Linguistic Landscape, Discover Opportunities.", + "description": "Integrates PDF reading with a language model to answer document-specific questions. Ideal for small-scale texts, it facilitates direct queries with immediate insights. ", "endpoint_name": null, + "id": "185e5e64-f33c-4532-8b65-25dc54c499c8", "is_component": false, - "last_tested_version": "1.4.3", - "name": "Clinical Entity Extraction", + "last_tested_version": "1.6.3", + "name": "Clinical Entities Extraction", "tags": [ "chart-review" ] diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Entity Normalization Agent.json b/src/backend/base/langflow/initial_setup/starter_projects/Entity Normalization Agent.json index 977e0df4646c..890875cafeb8 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Entity Normalization Agent.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Entity Normalization Agent.json @@ -6,347 +6,191 @@ "className": "", "data": { "sourceHandle": { - "dataType": "BlobStorage", - "id": "BlobStorage-3N6ar", - "name": "file_path", + "dataType": "ParserComponent", + "id": "ParserComponent-thGIC", + "name": "parsed_text", "output_types": [ - "Data" + "Message" ] }, "targetHandle": { - "fieldName": "url", - "id": "azure_ocr-RxVSX", + "fieldName": "input_value", + "id": "Agent-pqxdi", "inputTypes": [ - "str", - "Data", - "Message", - "list" + "Message" ], - "type": "other" + "type": "str" } }, - "id": "xy-edge__BlobStorage-3N6ar{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-3N6arœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}-azure_ocr-RxVSX{œfieldNameœ:œurlœ,œidœ:œazure_ocr-RxVSXœ,œinputTypesœ:[œstrœ,œDataœ,œMessageœ,œlistœ],œtypeœ:œotherœ}", - "source": "BlobStorage-3N6ar", - "sourceHandle": "{œdataTypeœ: œBlobStorageœ, œidœ: œBlobStorage-3N6arœ, œnameœ: œfile_pathœ, œoutput_typesœ: [œDataœ]}", - "target": "azure_ocr-RxVSX", - "targetHandle": "{œfieldNameœ: œurlœ, œidœ: œazure_ocr-RxVSXœ, œinputTypesœ: [œstrœ, œDataœ, œMessageœ, œlistœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-ParserComponent-thGIC{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-thGICœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Agent-pqxdi{œfieldNameœ:œinput_valueœ,œidœ:œAgent-pqxdiœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "ParserComponent-thGIC", + "sourceHandle": "{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-thGICœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}", + "target": "Agent-pqxdi", + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œAgent-pqxdiœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "azure_ocr", - "id": "azure_ocr-RxVSX", - "name": "structured_data", + "dataType": "AutonomizeModel", + "id": "AutonomizeModel-RGBq4", + "name": "component_as_tool", "output_types": [ - "Data" + "Tool" ] }, "targetHandle": { - "fieldName": "data", - "id": "ParseData-4lFB9", + "fieldName": "tools", + "id": "Agent-pqxdi", "inputTypes": [ - "Data" + "Tool" ], "type": "other" } }, - "id": "xy-edge__azure_ocr-RxVSX{œdataTypeœ:œazure_ocrœ,œidœ:œazure_ocr-RxVSXœ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataœ]}-ParseData-4lFB9{œfieldNameœ:œdataœ,œidœ:œParseData-4lFB9œ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "azure_ocr-RxVSX", - "sourceHandle": "{œdataTypeœ: œazure_ocrœ, œidœ: œazure_ocr-RxVSXœ, œnameœ: œstructured_dataœ, œoutput_typesœ: [œDataœ]}", - "target": "ParseData-4lFB9", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œParseData-4lFB9œ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-AutonomizeModel-RGBq4{œdataTypeœ:œAutonomizeModelœ,œidœ:œAutonomizeModel-RGBq4œ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}-Agent-pqxdi{œfieldNameœ:œtoolsœ,œidœ:œAgent-pqxdiœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}", + "selected": false, + "source": "AutonomizeModel-RGBq4", + "sourceHandle": "{œdataTypeœ:œAutonomizeModelœ,œidœ:œAutonomizeModel-RGBq4œ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}", + "target": "Agent-pqxdi", + "targetHandle": "{œfieldNameœ:œtoolsœ,œidœ:œAgent-pqxdiœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "ParseData", - "id": "ParseData-4lFB9", - "name": "text", + "dataType": "Agent", + "id": "Agent-pqxdi", + "name": "response", "output_types": [ "Message" ] }, "targetHandle": { - "fieldName": "search_query", - "id": "ClinicalLLM-I88wH", + "fieldName": "input_value", + "id": "ChatOutput-3xSmo", "inputTypes": [ + "Data", + "DataFrame", "Message" ], - "type": "str" + "type": "other" } }, - "id": "xy-edge__ParseData-4lFB9{œdataTypeœ:œParseDataœ,œidœ:œParseData-4lFB9œ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-ClinicalLLM-I88wH{œfieldNameœ:œsearch_queryœ,œidœ:œClinicalLLM-I88wHœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", - "source": "ParseData-4lFB9", - "sourceHandle": "{œdataTypeœ: œParseDataœ, œidœ: œParseData-4lFB9œ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", - "target": "ClinicalLLM-I88wH", - "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œClinicalLLM-I88wHœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "id": "reactflow__edge-Agent-pqxdi{œdataTypeœ:œAgentœ,œidœ:œAgent-pqxdiœ,œnameœ:œresponseœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-3xSmo{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-3xSmoœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", + "selected": false, + "source": "Agent-pqxdi", + "sourceHandle": "{œdataTypeœ:œAgentœ,œidœ:œAgent-pqxdiœ,œnameœ:œresponseœ,œoutput_typesœ:[œMessageœ]}", + "target": "ChatOutput-3xSmo", + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-3xSmoœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "ClinicalLLM", - "id": "ClinicalLLM-I88wH", - "name": "prediction", + "dataType": "AzureDocumentIntelligence", + "id": "AzureDocumentIntelligence-qxYVj", + "name": "structured_data", "output_types": [ - "Data" + "DataFrame" ] }, "targetHandle": { - "fieldName": "entities", - "id": "CombinedEntityLinking-vWvxa", + "fieldName": "input_data", + "id": "ParserComponent-thGIC", "inputTypes": [ + "DataFrame", "Data" ], "type": "other" } }, - "id": "xy-edge__ClinicalLLM-I88wH{œdataTypeœ:œClinicalLLMœ,œidœ:œClinicalLLM-I88wHœ,œnameœ:œpredictionœ,œoutput_typesœ:[œDataœ]}-CombinedEntityLinking-vWvxa{œfieldNameœ:œentitiesœ,œidœ:œCombinedEntityLinking-vWvxaœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "ClinicalLLM-I88wH", - "sourceHandle": "{œdataTypeœ: œClinicalLLMœ, œidœ: œClinicalLLM-I88wHœ, œnameœ: œpredictionœ, œoutput_typesœ: [œDataœ]}", - "target": "CombinedEntityLinking-vWvxa", - "targetHandle": "{œfieldNameœ: œentitiesœ, œidœ: œCombinedEntityLinking-vWvxaœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-AzureDocumentIntelligence-qxYVj{œdataTypeœ:œAzureDocumentIntelligenceœ,œidœ:œAzureDocumentIntelligence-qxYVjœ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataFrameœ]}-ParserComponent-thGIC{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-thGICœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", + "selected": false, + "source": "AzureDocumentIntelligence-qxYVj", + "sourceHandle": "{œdataTypeœ:œAzureDocumentIntelligenceœ,œidœ:œAzureDocumentIntelligence-qxYVjœ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataFrameœ]}", + "target": "ParserComponent-thGIC", + "targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-thGICœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "CombinedEntityLinking", - "id": "CombinedEntityLinking-vWvxa", - "name": "prediction", + "dataType": "BlobStorage", + "id": "BlobStorage-GQsIW", + "name": "file_path", "output_types": [ "Data" ] }, "targetHandle": { - "fieldName": "data", - "id": "EntityNormalizationExtraction-VnusN", + "fieldName": "url", + "id": "AzureDocumentIntelligence-qxYVj", "inputTypes": [ - "Data" + "str", + "Data", + "Message", + "list" ], "type": "other" } }, - "id": "xy-edge__CombinedEntityLinking-vWvxa{œdataTypeœ:œCombinedEntityLinkingœ,œidœ:œCombinedEntityLinking-vWvxaœ,œnameœ:œpredictionœ,œoutput_typesœ:[œDataœ]}-EntityNormalizationExtraction-VnusN{œfieldNameœ:œdataœ,œidœ:œEntityNormalizationExtraction-VnusNœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "CombinedEntityLinking-vWvxa", - "sourceHandle": "{œdataTypeœ: œCombinedEntityLinkingœ, œidœ: œCombinedEntityLinking-vWvxaœ, œnameœ: œpredictionœ, œoutput_typesœ: [œDataœ]}", - "target": "EntityNormalizationExtraction-VnusN", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œEntityNormalizationExtraction-VnusNœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-BlobStorage-GQsIW{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-GQsIWœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}-AzureDocumentIntelligence-qxYVj{œfieldNameœ:œurlœ,œidœ:œAzureDocumentIntelligence-qxYVjœ,œinputTypesœ:[œstrœ,œDataœ,œMessageœ,œlistœ],œtypeœ:œotherœ}", + "selected": false, + "source": "BlobStorage-GQsIW", + "sourceHandle": "{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-GQsIWœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}", + "target": "AzureDocumentIntelligence-qxYVj", + "targetHandle": "{œfieldNameœ:œurlœ,œidœ:œAzureDocumentIntelligence-qxYVjœ,œinputTypesœ:[œstrœ,œDataœ,œMessageœ,œlistœ],œtypeœ:œotherœ}" }, { + "animated": false, + "className": "", "data": { "sourceHandle": { - "dataType": "EntityNormalizationExtraction", - "id": "EntityNormalizationExtraction-VnusN", - "name": "filtered_entities", + "dataType": "AutonomizeModel", + "id": "AutonomizeModel-VUyU1", + "name": "component_as_tool", "output_types": [ - "Data" + "Tool" ] }, "targetHandle": { - "fieldName": "data", - "id": "JSONOutput-5QXSP", + "fieldName": "tools", + "id": "Agent-pqxdi", "inputTypes": [ - "Data" + "Tool" ], "type": "other" } }, - "id": "xy-edge__EntityNormalizationExtraction-VnusN{œdataTypeœ:œEntityNormalizationExtractionœ,œidœ:œEntityNormalizationExtraction-VnusNœ,œnameœ:œfiltered_entitiesœ,œoutput_typesœ:[œDataœ]}-JSONOutput-5QXSP{œfieldNameœ:œdataœ,œidœ:œJSONOutput-5QXSPœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "EntityNormalizationExtraction-VnusN", - "sourceHandle": "{œdataTypeœ: œEntityNormalizationExtractionœ, œidœ: œEntityNormalizationExtraction-VnusNœ, œnameœ: œfiltered_entitiesœ, œoutput_typesœ: [œDataœ]}", - "target": "JSONOutput-5QXSP", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œJSONOutput-5QXSPœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "xy-edge__AutonomizeModel-VUyU1{œdataTypeœ:œAutonomizeModelœ,œidœ:œAutonomizeModel-VUyU1œ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}-Agent-pqxdi{œfieldNameœ:œtoolsœ,œidœ:œAgent-pqxdiœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}", + "selected": false, + "source": "AutonomizeModel-VUyU1", + "sourceHandle": "{œdataTypeœ:œAutonomizeModelœ,œidœ:œAutonomizeModel-VUyU1œ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}", + "target": "Agent-pqxdi", + "targetHandle": "{œfieldNameœ:œtoolsœ,œidœ:œAgent-pqxdiœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}" } ], "nodes": [ { "data": { - "id": "BlobStorage-3N6ar", - "node": { - "base_classes": [ - "Data" - ], - "beta": false, - "category": "inputs", - "conditional_paths": [], - "custom_fields": {}, - "description": "Load files from Azure Blob Storage", - "display_name": "Blob Storage", - "documentation": "http://docs.langflow.org/components/storage", - "edited": false, - "field_order": [ - "storage_account", - "container_name", - "file_name", - "return_all_files" - ], - "frozen": false, - "icon": "Autonomize", - "key": "BlobStorage", - "legacy": false, - "lf_version": "1.4.3", - "metadata": {}, - "minimized": false, - "output_types": [], - "outputs": [ - { - "allows_loop": false, - "cache": true, - "display_name": "File Path", - "group_outputs": false, - "method": "get_file_paths", - "name": "file_path", - "selected": "Data", - "tool_mode": true, - "types": [ - "Data" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "score": 0.007568328950209746, - "template": { - "_type": "Component", - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "code", - "value": "\"\"\"Blob Storage Component for loading files from Azure Blob Storage.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.services.deps import get_flexstore_service\nfrom loguru import logger\n\n\nclass BlobStorageComponent(Component):\n display_name = \"Blob Storage\"\n category: str = \"input_output\"\n description = \"Load files from Azure Blob Storage\"\n documentation = \"http://docs.langflow.org/components/storage\"\n icon = \"Autonomize\"\n name = \"BlobStorage\"\n\n # Match the property name expected by FileComponent\n FILE_PATH_FIELD = \"file_path\"\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._container_list: list[str] = []\n self._file_list: list[str] = []\n\n inputs = [\n StrInput(\n name=\"storage_account\",\n display_name=\"Storage Account\",\n required=False,\n info=\"Storage Account name\",\n advanced=True,\n ),\n DropdownInput(\n name=\"container_name\",\n display_name=\"Container\",\n info=\"Select a container from the storage account\",\n required=True,\n refresh_button=True,\n ),\n DropdownInput(\n name=\"file_name\",\n display_name=\"File\",\n info=\"Select a file from the container\",\n required=True,\n refresh_button=True,\n ),\n BoolInput(\n name=\"return_all_files\",\n display_name=\"Return All Files\",\n info=\"If true and no specific file is selected, returns all files in the container\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"file_path\", # Match the property name expected by FileComponent\n display_name=\"File Path\",\n method=\"get_file_paths\",\n ),\n ]\n\n async def update_build_config(\n self, build_config: dict, field_value: Any, field_name: str | None = None\n ):\n \"\"\"Update the build configuration based on field changes.\"\"\"\n logger.info(f\"update_build_config called with field_name: {field_name}\")\n\n storage_account = getattr(self, \"storage_account\", None)\n container_name = getattr(self, \"container_name\", None)\n\n if field_name == \"container_name\":\n try:\n # Load the container options when the field is refreshed\n service = get_flexstore_service()\n self._container_list = await service.get_containers(storage_account)\n\n build_config[\"container_name\"][\"options\"] = self._container_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating container list: {e!s}\")\n raise\n\n elif field_name == \"file_name\" and container_name:\n try:\n # Load the file options when the field is refreshed\n service = get_flexstore_service()\n self._file_list = await service.get_files(\n storage_account, container_name\n )\n\n build_config[\"file_name\"][\"options\"] = self._file_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating file list: {e!s}\")\n raise\n\n return build_config\n\n async def get_file_paths(self) -> list[Data]:\n \"\"\"Get file paths for the FileComponent to process.\"\"\"\n try:\n if not self.container_name:\n logger.warning(\"Container name is required.\")\n return []\n\n service = get_flexstore_service()\n file_paths = []\n\n # If a specific file is selected\n if self.file_name:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, self.file_name\n )\n if signed_url:\n file_paths = [Data(data={self.FILE_PATH_FIELD: signed_url})]\n # If no specific file is selected and return_all_files is True\n elif self.return_all_files:\n files = await service.get_files(\n self.storage_account, self.container_name\n )\n for file in files:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, file\n )\n if signed_url:\n file_paths.append(Data(data={self.FILE_PATH_FIELD: signed_url}))\n\n if file_paths:\n self.status = file_paths\n logger.info(f\"Generated {len(file_paths)} file paths\")\n for path in file_paths:\n logger.debug(f\"File path: {path.data.get(self.FILE_PATH_FIELD)}\")\n else:\n logger.warning(\"No file paths generated\")\n\n return file_paths\n\n except Exception as e:\n logger.error(f\"Error in get_file_paths: {e!s}\")\n return []\n" - }, - "container_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Container", - "dynamic": false, - "info": "Select a container from the storage account", - "name": "container_name", - "options": [], - "options_metadata": [], - "placeholder": "", - "refresh_button": true, - "required": true, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "genesis-container" - }, - "file_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "File", - "dynamic": false, - "info": "Select a file from the container", - "name": "file_name", - "options": [], - "options_metadata": [], - "placeholder": "", - "refresh_button": true, - "required": true, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "0108_Subject199.pdf" - }, - "return_all_files": { - "_input_type": "BoolInput", - "advanced": false, - "display_name": "Return All Files", - "dynamic": false, - "info": "If true and no specific file is selected, returns all files in the container", - "list": false, - "list_add_label": "Add More", - "name": "return_all_files", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "storage_account": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Storage Account", - "dynamic": false, - "info": "Storage Account name", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "storage_account", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - } - }, - "tool_mode": false - }, - "showNode": true, - "type": "BlobStorage" - }, - "dragging": false, - "id": "BlobStorage-3N6ar", - "measured": { - "height": 365, - "width": 320 - }, - "position": { - "x": 1001.3198785001694, - "y": 63.6941016858937 - }, - "selected": false, - "type": "genericNode" - }, - { - "data": { - "id": "azure_ocr-RxVSX", + "id": "AzureDocumentIntelligence-qxYVj", "node": { "base_classes": [ - "Data" + "DataFrame" ], "beta": false, "category": "models", "conditional_paths": [], "custom_fields": {}, - "description": "Process documents using Azure Form Recognizer OCR capabilities", - "display_name": "Form Recognizer", - "documentation": "", + "description": "Process documents using Azure Document Intelligence (formerly Form Recognizer) for OCR, form extraction, and document analysis", + "display_name": "Azure Document Intelligence", + "documentation": "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/", "edited": false, "field_order": [ "url", @@ -363,9 +207,9 @@ ], "frozen": false, "icon": "Azure", - "key": "azure_ocr", + "key": "AzureDocumentIntelligence", "legacy": false, - "lf_version": "1.4.3", + "lf_version": "1.6.3", "metadata": {}, "minimized": false, "output_types": [], @@ -374,18 +218,20 @@ "allows_loop": false, "cache": true, "display_name": "Structured Data", + "group_outputs": false, "method": "load_files", "name": "structured_data", - "selected": "Data", + "selected": "DataFrame", "tool_mode": true, "types": [ - "Data" + "DataFrame" ], "value": "__UNDEFINED__" } ], "pinned": false, - "score": 0.007568328950209746, + "priority": 3, + "score": 0.10489765225226892, "template": { "_type": "Component", "code": { @@ -404,7 +250,7 @@ "show": true, "title_case": false, "type": "code", - "value": "\"\"\"Form Recognizer Component for processing and analyzing form data.\"\"\"\n\nimport asyncio\nimport concurrent.futures\nimport mimetypes\nimport os\nimport tempfile\nfrom pathlib import Path\nfrom urllib.parse import unquote, urlparse\n\nimport aiohttp\nimport requests\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, Output\nfrom langflow.schema import Data\nfrom loguru import logger\n\n\nclass FormRecognizerComponent(BaseFileComponent):\n \"\"\"Component for recognizing and processing form data.\"\"\"\n\n display_name = \"Form Recognizer\"\n description = \"Process documents using Azure Form Recognizer OCR capabilities\"\n icon = \"Azure\"\n name = \"azure_ocr\"\n # legacy = True\n\n VALID_EXTENSIONS = [\"pdf\", \"jpg\", \"jpeg\", \"png\", \"bmp\", \"tiff\", \"tif\"]\n\n inputs = [\n HandleInput(\n name=\"url\",\n display_name=\"URL\",\n info=\"URL to the document to process\",\n input_types=[\"str\", \"Data\", \"Message\", \"list\"],\n required=False,\n ),\n # Include only the HandleInput and BoolInputs from base_inputs\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"file_path\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"silent_errors\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"delete_server_file_after_processing\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unsupported_extensions\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unspecified_files\"\n ),\n DropdownInput(\n name=\"model_type\",\n display_name=\"Model Type\",\n options=[\"prebuilt-document\", \"prebuilt-read\", \"prebuilt-layout\"],\n value=\"prebuilt-document\",\n info=\"Choose the Form Recognizer model to use\",\n ),\n BoolInput(\n name=\"extract_tables\",\n display_name=\"Extract Tables\",\n value=True,\n info=\"Extract and format tables from the document\",\n ),\n BoolInput(\n name=\"include_confidence\",\n display_name=\"Include Confidence Scores\",\n value=False,\n advanced=True,\n info=\"Include confidence scores in the extracted text\",\n ),\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"Use Concurrent Processing\",\n value=True,\n info=\"Enable concurrent processing of multiple files\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"Number of files to process concurrently\",\n value=2,\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Structured Data\", name=\"structured_data\", method=\"load_files\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self.temp_dir = tempfile.mkdtemp()\n self._downloaded_files = {}\n self._text_content = \"\"\n\n\n def get_text_content(self) -> str:\n \"\"\"Return the concatenated text content from all processed pages.\"\"\"\n return self._text_content\n\n def _extract_filename_from_url(self, url: str) -> str:\n \"\"\"Extract filename from URL or generate a default one.\"\"\"\n try:\n logger.debug(f\"Extracting filename from URL: {url}\")\n parsed_url = urlparse(url)\n path = unquote(parsed_url.path)\n filename = os.path.basename(path)\n\n if filename and \".\" in filename:\n logger.debug(f\"Found filename in URL path: {filename}\")\n return filename\n\n response = requests.head(url, allow_redirects=True)\n if \"content-disposition\" in response.headers:\n content_disp = response.headers[\"content-disposition\"]\n if \"filename=\" in content_disp:\n filename = content_disp.split(\"filename=\")[1].strip(\"\\\"'\")\n logger.debug(f\"Found filename in content-disposition: {filename}\")\n return filename\n\n if \"content-type\" in response.headers:\n ext = mimetypes.guess_extension(response.headers[\"content-type\"])\n if ext:\n filename = f\"downloaded{ext}\"\n logger.debug(f\"Generated filename from content-type: {filename}\")\n return filename\n\n logger.debug(\"Using default filename: downloaded.pdf\")\n return \"downloaded.pdf\"\n except Exception as e:\n logger.error(f\"Error extracting filename from URL: {e!s}\")\n return \"downloaded.pdf\"\n\n async def _download_file_from_url(self, url: str) -> str | None:\n \"\"\"Download a file from a URL.\"\"\"\n try:\n logger.debug(f\"Attempting to download file from URL: {url}\")\n filename = self._extract_filename_from_url(url)\n local_path = os.path.join(self.temp_dir, filename)\n logger.debug(f\"Local path for download: {local_path}\")\n\n async with aiohttp.ClientSession() as session:\n async with session.get(url) as response:\n response.raise_for_status()\n with open(local_path, \"wb\") as f:\n while True:\n chunk = await response.content.read(8192)\n if not chunk:\n break\n f.write(chunk)\n\n self._downloaded_files[url] = local_path\n logger.info(f\"Successfully downloaded file to {local_path}\")\n return local_path\n\n except Exception as e:\n logger.error(f\"Error downloading file from URL: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n def _extract_url_from_input(self, input_data) -> str | None:\n \"\"\"Extract URL string from various input types.\"\"\"\n logger.debug(f\"Extracting URL from input data type: {type(input_data)}\")\n\n # Handle list of Data objects (from blob storage)\n if isinstance(input_data, list):\n logger.debug(f\"Processing list input with {len(input_data)} items\")\n if input_data and isinstance(input_data[0], Data):\n url = input_data[0].data.get(\"file_path\")\n logger.debug(f\"Extracted URL from first Data object in list: {url}\")\n return url\n return None\n\n if isinstance(input_data, str):\n logger.debug(f\"Input is string: {input_data}\")\n return input_data\n elif isinstance(input_data, Data):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from Data object: {url}\")\n return url\n elif hasattr(input_data, \"text\"):\n logger.debug(f\"Extracted URL from text attribute: {input_data.text}\")\n return input_data.text\n elif hasattr(input_data, \"data\"):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from data attribute: {url}\")\n return url\n logger.debug(\"No URL found in input data\")\n return None\n\n def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Handle URLs and local paths.\"\"\"\n resolved_files = []\n logger.debug(\"Starting path validation and resolution\")\n\n # Handle URL input if provided\n if hasattr(self, \"url\") and self.url:\n try:\n logger.debug(f\"Processing URL input: {self.url}\")\n # Extract URL from different input types\n url = self._extract_url_from_input(self.url)\n if not url:\n logger.warning(\"No valid URL found in input\")\n return resolved_files\n\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(url)\n )\n finally:\n loop.close()\n\n if local_path:\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": url,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n except Exception as e:\n logger.error(f\"Error processing URL {url}: {e!s}\")\n if not self.silent_errors:\n raise\n\n # Handle file_path input\n file_path = self._file_path_as_list()\n logger.debug(f\"Processing file_path input: {file_path}\")\n for obj in file_path:\n server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)\n logger.debug(f\"Processing server file path: {server_file_path}\")\n\n if not server_file_path:\n if not self.ignore_unspecified_files:\n msg = f\"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property.\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n try:\n # Check if it's a URL\n if isinstance(server_file_path, str) and server_file_path.startswith(\n (\"http://\", \"https://\")\n ):\n logger.debug(f\"Processing URL from file_path: {server_file_path}\")\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(server_file_path)\n )\n finally:\n loop.close()\n\n if not local_path:\n continue\n\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": server_file_path,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n else:\n # Handle local files\n resolved_path = Path(self.resolve_path(str(server_file_path)))\n logger.debug(f\"Resolved local file path: {resolved_path}\")\n if not resolved_path.exists():\n msg = f\"File not found: {server_file_path}\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n obj,\n resolved_path,\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n\n except Exception as e:\n logger.error(f\"Error processing path {server_file_path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n logger.debug(f\"Resolved {len(resolved_files)} files\")\n return resolved_files\n\n async def process_file(\n self, file_path: str, *, silent_errors: bool = False\n ) -> tuple[Data, str]:\n \"\"\"Process a single file using the OCR service.\"\"\"\n try:\n from langflow.services.manager import service_manager\n\n ocr_service = service_manager.get(\"ocr_service\")\n\n with open(file_path, \"rb\") as file:\n file_content = file.read()\n\n extracted_content, plain_text = await ocr_service.process_document(\n file_content=file_content,\n model_type=self.model_type,\n include_confidence=self.include_confidence,\n extract_tables=self.extract_tables,\n )\n\n structured_data = Data(\n text=plain_text,\n data={\n self.SERVER_FILE_PATH_FIELDNAME: str(file_path),\n \"result\": extracted_content,\n },\n )\n\n return structured_data, plain_text\n\n except Exception as e:\n logger.error(f\"Error processing file {file_path}: {e!s}\")\n if not silent_errors:\n raise\n return None, \"\"\n\n def process_files(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process multiple files with concurrent processing.\"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n concurrency = (\n 1\n if not self.use_multithreading\n else max(1, self.concurrency_multithreading)\n )\n file_count = len(file_list)\n\n logger.info(f\"Processing {file_count} files with concurrency: {concurrency}\")\n\n all_plain_text = []\n processed_data = []\n\n if concurrency > 1 and file_count > 1:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n with concurrent.futures.ThreadPoolExecutor(\n max_workers=concurrency\n ) as executor:\n future_to_file = {\n executor.submit(\n lambda path: loop.run_until_complete(\n self.process_file(\n str(path), silent_errors=self.silent_errors\n )\n ),\n file.path,\n ): file\n for file in file_list\n }\n for future in concurrent.futures.as_completed(future_to_file):\n try:\n structured_data, plain_text = future.result()\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error in concurrent processing: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n else:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n for file in file_list:\n try:\n structured_data, plain_text = loop.run_until_complete(\n self.process_file(\n str(file.path), silent_errors=self.silent_errors\n )\n )\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error processing file {file.path}: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n\n # Store concatenated text content\n self._text_content = \"\\n\\n=== NEW DOCUMENT ===\\n\\n\".join(all_plain_text)\n\n return self.rollup_data(file_list, processed_data)\n\n def __del__(self):\n \"\"\"Cleanup temporary files and directory.\"\"\"\n try:\n if hasattr(self, \"temp_dir\") and os.path.exists(self.temp_dir):\n # Remove downloaded files\n for file_path in self._downloaded_files.values():\n if os.path.exists(file_path):\n os.unlink(file_path)\n # Remove the temporary directory\n os.rmdir(self.temp_dir)\n except Exception as e:\n logger.error(f\"Error cleaning up temporary files: {e!s}\")\n" + "value": "\"\"\"Azure Document Intelligence Component - Form recognition and document processing.\"\"\"\n\nimport asyncio\nimport concurrent.futures\nimport mimetypes\nimport os\nimport tempfile\nfrom pathlib import Path\nfrom urllib.parse import unquote, urlparse\n\nimport aiohttp\nimport requests\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, Output\nfrom langflow.schema.data import Data\nfrom loguru import logger\n\n\nclass AzureDocumentIntelligenceComponent(BaseFileComponent):\n \"\"\"Component for Azure Document Intelligence - advanced document processing and form recognition.\"\"\"\n\n display_name: str = \"Azure Document Intelligence\"\n description: str = \"Process documents using Azure Document Intelligence (formerly Form Recognizer) for OCR, form extraction, and document analysis\"\n documentation: str = \"https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/\"\n icon: str = \"Azure\"\n name: str = \"AzureDocumentIntelligence\"\n category: str = \"models\"\n priority: int = 3 # High priority for document processing\n\n VALID_EXTENSIONS = [\"pdf\", \"jpg\", \"jpeg\", \"png\", \"bmp\", \"tiff\", \"tif\"]\n\n inputs = [\n HandleInput(\n name=\"url\",\n display_name=\"URL\",\n info=\"URL to the document to process\",\n input_types=[\"str\", \"Data\", \"Message\", \"list\"],\n required=False,\n ),\n # Include only the HandleInput and BoolInputs from base_inputs\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"file_path\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"silent_errors\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"delete_server_file_after_processing\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unsupported_extensions\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unspecified_files\"\n ),\n DropdownInput(\n name=\"model_type\",\n display_name=\"Model Type\",\n options=[\"prebuilt-document\", \"prebuilt-read\", \"prebuilt-layout\"],\n value=\"prebuilt-document\",\n info=\"Choose the Form Recognizer model to use\",\n ),\n BoolInput(\n name=\"extract_tables\",\n display_name=\"Extract Tables\",\n value=True,\n info=\"Extract and format tables from the document\",\n ),\n BoolInput(\n name=\"include_confidence\",\n display_name=\"Include Confidence Scores\",\n value=False,\n advanced=True,\n info=\"Include confidence scores in the extracted text\",\n ),\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"Use Concurrent Processing\",\n value=True,\n info=\"Enable concurrent processing of multiple files\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"Number of files to process concurrently\",\n value=2,\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Structured Data\", name=\"structured_data\", method=\"load_files\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self.temp_dir = tempfile.mkdtemp()\n self._downloaded_files = {}\n self._text_content = \"\"\n\n def get_text_content(self) -> str:\n \"\"\"Return the concatenated text content from all processed pages.\"\"\"\n return self._text_content\n\n def _extract_filename_from_url(self, url: str) -> str:\n \"\"\"Extract filename from URL or generate a default one.\"\"\"\n try:\n logger.debug(f\"Extracting filename from URL: {url}\")\n parsed_url = urlparse(url)\n path = unquote(parsed_url.path)\n filename = os.path.basename(path)\n\n if filename and \".\" in filename:\n logger.debug(f\"Found filename in URL path: {filename}\")\n return filename\n\n response = requests.head(url, allow_redirects=True)\n if \"content-disposition\" in response.headers:\n content_disp = response.headers[\"content-disposition\"]\n if \"filename=\" in content_disp:\n filename = content_disp.split(\"filename=\")[1].strip(\"\\\"'\")\n logger.debug(f\"Found filename in content-disposition: {filename}\")\n return filename\n\n if \"content-type\" in response.headers:\n ext = mimetypes.guess_extension(response.headers[\"content-type\"])\n if ext:\n filename = f\"downloaded{ext}\"\n logger.debug(f\"Generated filename from content-type: {filename}\")\n return filename\n\n logger.debug(\"Using default filename: downloaded.pdf\")\n return \"downloaded.pdf\"\n except Exception as e:\n logger.error(f\"Error extracting filename from URL: {e!s}\")\n return \"downloaded.pdf\"\n\n async def _download_file_from_url(self, url: str) -> str | None:\n \"\"\"Download a file from a URL.\"\"\"\n try:\n logger.debug(f\"Attempting to download file from URL: {url}\")\n filename = self._extract_filename_from_url(url)\n local_path = os.path.join(self.temp_dir, filename)\n logger.debug(f\"Local path for download: {local_path}\")\n\n async with aiohttp.ClientSession() as session:\n async with session.get(url) as response:\n response.raise_for_status()\n with open(local_path, \"wb\") as f:\n while True:\n chunk = await response.content.read(8192)\n if not chunk:\n break\n f.write(chunk)\n\n self._downloaded_files[url] = local_path\n logger.info(f\"Successfully downloaded file to {local_path}\")\n return local_path\n\n except Exception as e:\n logger.error(f\"Error downloading file from URL: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n def _extract_url_from_input(self, input_data) -> str | None:\n \"\"\"Extract URL string from various input types.\"\"\"\n logger.debug(f\"Extracting URL from input data type: {type(input_data)}\")\n\n # Handle list of Data objects (from blob storage)\n if isinstance(input_data, list):\n logger.debug(f\"Processing list input with {len(input_data)} items\")\n if input_data and isinstance(input_data[0], Data):\n url = input_data[0].data.get(\"file_path\")\n logger.debug(f\"Extracted URL from first Data object in list: {url}\")\n return url\n return None\n\n if isinstance(input_data, str):\n logger.debug(f\"Input is string: {input_data}\")\n return input_data\n elif isinstance(input_data, Data):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from Data object: {url}\")\n return url\n elif hasattr(input_data, \"text\"):\n logger.debug(f\"Extracted URL from text attribute: {input_data.text}\")\n return input_data.text\n elif hasattr(input_data, \"data\"):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from data attribute: {url}\")\n return url\n logger.debug(\"No URL found in input data\")\n return None\n\n def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Handle URLs and local paths.\"\"\"\n resolved_files = []\n logger.debug(\"Starting path validation and resolution\")\n\n # Handle URL input if provided\n if hasattr(self, \"url\") and self.url:\n try:\n logger.debug(f\"Processing URL input: {self.url}\")\n # Extract URL from different input types\n url = self._extract_url_from_input(self.url)\n if not url:\n logger.warning(\"No valid URL found in input\")\n return resolved_files\n\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(url)\n )\n finally:\n loop.close()\n\n if local_path:\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": url,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n except Exception as e:\n logger.error(f\"Error processing URL {url}: {e!s}\")\n if not self.silent_errors:\n raise\n\n # Handle file_path input\n file_path = self._file_path_as_list()\n logger.debug(f\"Processing file_path input: {file_path}\")\n for obj in file_path:\n server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)\n logger.debug(f\"Processing server file path: {server_file_path}\")\n\n if not server_file_path:\n if not self.ignore_unspecified_files:\n msg = f\"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property.\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n try:\n # Check if it's a URL\n if isinstance(server_file_path, str) and server_file_path.startswith(\n (\"http://\", \"https://\")\n ):\n logger.debug(f\"Processing URL from file_path: {server_file_path}\")\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(server_file_path)\n )\n finally:\n loop.close()\n\n if not local_path:\n continue\n\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": server_file_path,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n else:\n # Handle local files\n resolved_path = Path(self.resolve_path(str(server_file_path)))\n logger.debug(f\"Resolved local file path: {resolved_path}\")\n if not resolved_path.exists():\n msg = f\"File not found: {server_file_path}\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n obj,\n resolved_path,\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n\n except Exception as e:\n logger.error(f\"Error processing path {server_file_path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n logger.debug(f\"Resolved {len(resolved_files)} files\")\n return resolved_files\n\n async def process_file(\n self, file_path: str, *, silent_errors: bool = False\n ) -> tuple[Data, str]:\n \"\"\"Process a single file using the OCR service.\"\"\"\n try:\n from langflow.services.deps import get_document_intelligence_service\n \n # Create OCR service directly\n ocr_service = get_document_intelligence_service()\n\n with open(file_path, \"rb\") as file:\n file_content = file.read()\n\n extracted_content, plain_text = await ocr_service.process_document(\n file_content=file_content,\n model_type=self.model_type,\n include_confidence=self.include_confidence,\n extract_tables=self.extract_tables,\n )\n\n structured_data = Data(\n text=plain_text,\n data={\n self.SERVER_FILE_PATH_FIELDNAME: str(file_path),\n \"result\": extracted_content,\n },\n )\n\n return structured_data, plain_text\n\n except Exception as e:\n logger.error(f\"Error processing file {file_path}: {e!s}\")\n if not silent_errors:\n raise\n return None, \"\"\n\n def process_files(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process multiple files with concurrent processing.\"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n concurrency = (\n 1\n if not self.use_multithreading\n else max(1, self.concurrency_multithreading)\n )\n file_count = len(file_list)\n\n logger.info(f\"Processing {file_count} files with concurrency: {concurrency}\")\n\n all_plain_text = []\n processed_data = []\n\n if concurrency > 1 and file_count > 1:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n with concurrent.futures.ThreadPoolExecutor(\n max_workers=concurrency\n ) as executor:\n future_to_file = {\n executor.submit(\n lambda path: loop.run_until_complete(\n self.process_file(\n str(path), silent_errors=self.silent_errors\n )\n ),\n file.path,\n ): file\n for file in file_list\n }\n for future in concurrent.futures.as_completed(future_to_file):\n try:\n structured_data, plain_text = future.result()\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error in concurrent processing: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n else:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n for file in file_list:\n try:\n structured_data, plain_text = loop.run_until_complete(\n self.process_file(\n str(file.path), silent_errors=self.silent_errors\n )\n )\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error processing file {file.path}: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n\n # Store concatenated text content\n self._text_content = \"\\n\\n=== NEW DOCUMENT ===\\n\\n\".join(all_plain_text)\n\n return self.rollup_data(file_list, processed_data)\n\n def __del__(self):\n \"\"\"Cleanup temporary files and directory.\"\"\"\n try:\n if hasattr(self, \"temp_dir\") and os.path.exists(self.temp_dir):\n # Remove downloaded files\n for file_path in self._downloaded_files.values():\n if os.path.exists(file_path):\n os.unlink(file_path)\n # Remove the temporary directory\n os.rmdir(self.temp_dir)\n except Exception as e:\n logger.error(f\"Error cleaning up temporary files: {e!s}\")\n" }, "concurrency_multithreading": { "_input_type": "IntInput", @@ -542,6 +388,7 @@ "dialog_inputs": {}, "display_name": "Model Type", "dynamic": false, + "external_options": {}, "info": "Choose the Form Recognizer model to use", "name": "model_type", "options": [ @@ -623,84 +470,67 @@ "tool_mode": false }, "showNode": true, - "type": "azure_ocr" + "type": "AzureDocumentIntelligence" }, - "dragging": false, - "id": "azure_ocr-RxVSX", + "id": "AzureDocumentIntelligence-qxYVj", "measured": { - "height": 393, + "height": 365, "width": 320 }, "position": { - "x": 1314.8779509783526, - "y": 539.0347966661186 + "x": 1235.2467548657498, + "y": 581.571523636203 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "ParseData-4lFB9", + "id": "ParserComponent-thGIC", "node": { "base_classes": [ - "Dict", "Message" ], "beta": false, - "category": "utils", + "category": "processing", "conditional_paths": [], "custom_fields": {}, - "description": "Convert Data objects into Messages using any {field_name} from input data.", - "display_name": "Parse Data", - "documentation": "", + "description": "Extracts text using a template.", + "display_name": "Parser", + "documentation": "https://docs.langflow.org/components-processing#parser", "edited": false, "field_order": [ - "data", - "template", + "input_data", + "mode", + "pattern", "sep" ], "frozen": false, - "icon": "message-square", - "key": "ParseData", - "legacy": true, - "lf_version": "1.4.3", - "metadata": { - "legacy_name": "Parse Data" - }, + "icon": "braces", + "key": "ParserComponent", + "legacy": false, + "lf_version": "1.6.3", + "metadata": {}, "minimized": false, "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "Message", + "display_name": "Parsed Text", "group_outputs": false, - "method": "parse_data", - "name": "text", + "method": "parse_combined_text", + "name": "parsed_text", "selected": "Message", "tool_mode": true, "types": [ "Message" ], "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "Data List", - "group_outputs": false, - "method": "parse_data_as_list", - "name": "data_list", - "selected": "Dict", - "tool_mode": true, - "types": [ - "Data" - ], - "value": "__UNDEFINED__" } ], "pinned": false, - "score": 0.007568328950209746, + "score": 0.001, "template": { "_type": "Component", "code": { @@ -719,56 +549,57 @@ "show": true, "title_case": false, "type": "code", - "value": "from langflow.custom.custom_component.component import Component\nfrom langflow.helpers.data import data_to_text, data_to_text_list\nfrom langflow.io import DataInput, MultilineInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.message import Message\n\n\nclass ParseDataComponent(Component):\n display_name = \"Data to Message\"\n description = \"Convert Data objects into Messages using any {field_name} from input data.\"\n icon = \"message-square\"\n name = \"ParseData\"\n legacy = True\n replacement = [\"processing.DataOperations\", \"processing.TypeConverterComponent\"]\n metadata = {\n \"legacy_name\": \"Parse Data\",\n }\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The data to convert to text.\",\n is_list=True,\n required=True,\n ),\n MultilineInput(\n name=\"template\",\n display_name=\"Template\",\n info=\"The template to use for formatting the data. \"\n \"It can contain the keys {text}, {data} or any other key in the Data.\",\n value=\"{text}\",\n required=True,\n ),\n StrInput(name=\"sep\", display_name=\"Separator\", advanced=True, value=\"\\n\"),\n ]\n\n outputs = [\n Output(\n display_name=\"Message\",\n name=\"text\",\n info=\"Data as a single Message, with each input Data separated by Separator\",\n method=\"parse_data\",\n ),\n Output(\n display_name=\"Data List\",\n name=\"data_list\",\n info=\"Data as a list of new Data, each having `text` formatted by Template\",\n method=\"parse_data_as_list\",\n ),\n ]\n\n def _clean_args(self) -> tuple[list[Data], str, str]:\n data = self.data if isinstance(self.data, list) else [self.data]\n template = self.template\n sep = self.sep\n return data, template, sep\n\n def parse_data(self) -> Message:\n data, template, sep = self._clean_args()\n result_string = data_to_text(template, data, sep)\n self.status = result_string\n return Message(text=result_string)\n\n def parse_data_as_list(self) -> list[Data]:\n data, template, _ = self._clean_args()\n text_list, data_list = data_to_text_list(template, data)\n for item, text in zip(data_list, text_list, strict=True):\n item.set_text(text)\n self.status = data_list\n return data_list\n" + "value": "from langflow.custom.custom_component.component import Component\nfrom langflow.helpers.data import safe_convert\nfrom langflow.inputs.inputs import BoolInput, HandleInput, MessageTextInput, MultilineInput, TabInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.template.field.base import Output\n\n\nclass ParserComponent(Component):\n display_name = \"Parser\"\n description = \"Extracts text using a template.\"\n documentation: str = \"https://docs.langflow.org/components-processing#parser\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"input_data\",\n display_name=\"Data or DataFrame\",\n input_types=[\"DataFrame\", \"Data\"],\n info=\"Accepts either a DataFrame or a Data object.\",\n required=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Parser\", \"Stringify\"],\n value=\"Parser\",\n info=\"Convert into raw string instead of using a template.\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"pattern\",\n display_name=\"Template\",\n info=(\n \"Use variables within curly brackets to extract column values for DataFrames \"\n \"or key values for Data.\"\n \"For example: `Name: {Name}, Age: {Age}, Country: {Country}`\"\n ),\n value=\"Text: {text}\", # Example default\n dynamic=True,\n show=True,\n required=True,\n ),\n MessageTextInput(\n name=\"sep\",\n display_name=\"Separator\",\n advanced=True,\n value=\"\\n\",\n info=\"String used to separate rows/items.\",\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Parsed Text\",\n name=\"parsed_text\",\n info=\"Formatted text output.\",\n method=\"parse_combined_text\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Dynamically hide/show `template` and enforce requirement based on `stringify`.\"\"\"\n if field_name == \"mode\":\n build_config[\"pattern\"][\"show\"] = self.mode == \"Parser\"\n build_config[\"pattern\"][\"required\"] = self.mode == \"Parser\"\n if field_value:\n clean_data = BoolInput(\n name=\"clean_data\",\n display_name=\"Clean Data\",\n info=(\n \"Enable to clean the data by removing empty rows and lines \"\n \"in each cell of the DataFrame/ Data object.\"\n ),\n value=True,\n advanced=True,\n required=False,\n )\n build_config[\"clean_data\"] = clean_data.to_dict()\n else:\n build_config.pop(\"clean_data\", None)\n\n return build_config\n\n def _clean_args(self):\n \"\"\"Prepare arguments based on input type.\"\"\"\n input_data = self.input_data\n\n match input_data:\n case list() if all(isinstance(item, Data) for item in input_data):\n msg = \"List of Data objects is not supported.\"\n raise ValueError(msg)\n case DataFrame():\n return input_data, None\n case Data():\n return None, input_data\n case dict() if \"data\" in input_data:\n try:\n if \"columns\" in input_data: # Likely a DataFrame\n return DataFrame.from_dict(input_data), None\n # Likely a Data object\n return None, Data(**input_data)\n except (TypeError, ValueError, KeyError) as e:\n msg = f\"Invalid structured input provided: {e!s}\"\n raise ValueError(msg) from e\n case _:\n msg = f\"Unsupported input type: {type(input_data)}. Expected DataFrame or Data.\"\n raise ValueError(msg)\n\n def parse_combined_text(self) -> Message:\n \"\"\"Parse all rows/items into a single text or convert input to string if `stringify` is enabled.\"\"\"\n # Early return for stringify option\n if self.mode == \"Stringify\":\n return self.convert_to_string()\n\n df, data = self._clean_args()\n\n lines = []\n if df is not None:\n for _, row in df.iterrows():\n formatted_text = self.pattern.format(**row.to_dict())\n lines.append(formatted_text)\n elif data is not None:\n formatted_text = self.pattern.format(**data.data)\n lines.append(formatted_text)\n\n combined_text = self.sep.join(lines)\n self.status = combined_text\n return Message(text=combined_text)\n\n def convert_to_string(self) -> Message:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n result = \"\"\n if isinstance(self.input_data, list):\n result = \"\\n\".join([safe_convert(item, clean_data=self.clean_data or False) for item in self.input_data])\n else:\n result = safe_convert(self.input_data or False)\n self.log(f\"Converted to string with length: {len(result)}\")\n\n message = Message(text=result)\n self.status = message\n return message\n" }, - "data": { - "_input_type": "DataInput", + "input_data": { + "_input_type": "HandleInput", "advanced": false, - "display_name": "Data", + "display_name": "Data or DataFrame", "dynamic": false, - "info": "The data to convert to text.", + "info": "Accepts either a DataFrame or a Data object.", "input_types": [ + "DataFrame", "Data" ], - "list": true, + "list": false, "list_add_label": "Add More", - "name": "data", + "name": "input_data", "placeholder": "", "required": true, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, "type": "other", "value": "" }, - "sep": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Separator", + "mode": { + "_input_type": "TabInput", + "advanced": false, + "display_name": "Mode", "dynamic": false, - "info": "", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "sep", + "info": "Convert into raw string instead of using a template.", + "name": "mode", + "options": [ + "Parser", + "Stringify" + ], "placeholder": "", + "real_time_refresh": true, "required": false, "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "\n" + "type": "tab", + "value": "Parser" }, - "template": { + "pattern": { "_input_type": "MultilineInput", "advanced": false, "copy_field": false, "display_name": "Template", - "dynamic": false, - "info": "The template to use for formatting the data. It can contain the keys {text}, {data} or any other key in the Data.", + "dynamic": true, + "info": "Use variables within curly brackets to extract column values for DataFrames or key values for Data.For example: `Name: {Name}, Age: {Age}, Country: {Country}`", "input_types": [ "Message" ], @@ -776,7 +607,7 @@ "list_add_label": "Add More", "load_from_db": false, "multiline": true, - "name": "template", + "name": "pattern", "placeholder": "", "required": true, "show": true, @@ -786,49 +617,676 @@ "trace_as_metadata": true, "type": "str", "value": "{text}" + }, + "sep": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Separator", + "dynamic": false, + "info": "String used to separate rows/items.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "sep", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "\n" } }, "tool_mode": false }, "showNode": true, - "type": "ParseData" + "type": "ParserComponent" }, - "dragging": false, - "id": "ParseData-4lFB9", + "id": "ParserComponent-thGIC", "measured": { - "height": 349, + "height": 329, "width": 320 }, "position": { - "x": 1720.1684382772812, - "y": 819.235627391304 + "x": 1599.4717455363013, + "y": 663.6202609077544 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "ClinicalLLM-I88wH", + "id": "Agent-pqxdi", + "node": { + "base_classes": [ + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Define the agent's instructions, then enter a task to complete using tools.", + "display_name": "Agent", + "documentation": "https://docs.langflow.org/agents", + "edited": false, + "field_order": [ + "agent_llm", + "max_tokens", + "model_kwargs", + "model_name", + "openai_api_base", + "api_key", + "temperature", + "seed", + "max_retries", + "timeout", + "system_prompt", + "n_messages", + "format_instructions", + "output_schema", + "tools", + "input_value", + "handle_parsing_errors", + "verbose", + "max_iterations", + "agent_description", + "add_current_date_tool" + ], + "frozen": false, + "icon": "bot", + "last_updated": "2025-10-29T10:50:18.999Z", + "legacy": false, + "lf_version": "1.6.3", + "metadata": {}, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Response", + "group_outputs": false, + "method": "message_response", + "name": "response", + "options": null, + "required_inputs": null, + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "add_current_date_tool": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Current Date", + "dynamic": false, + "info": "If true, will add a tool to the agent that returns the current date.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "add_current_date_tool", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "agent_description": { + "_input_type": "MultilineInput", + "advanced": true, + "copy_field": false, + "display_name": "Agent Description [Deprecated]", + "dynamic": false, + "info": "The description of the agent. This is only used when in Tool Mode. Defaults to 'A helpful assistant with access to the following tools:' and tools are added dynamically. This feature is deprecated and will be removed in future versions.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "agent_description", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "A helpful assistant with access to the following tools:" + }, + "agent_llm": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model Provider", + "dynamic": false, + "external_options": { + "fields": { + "data": { + "node": { + "display_name": "Connect other models", + "icon": "CornerDownLeft", + "name": "connect_other_models" + } + } + } + }, + "info": "The provider of the language model that the agent will use to generate responses.", + "input_types": [], + "name": "agent_llm", + "options": [ + "Anthropic", + "Google Generative AI", + "OpenAI", + "Azure OpenAI" + ], + "options_metadata": [ + { + "icon": "Anthropic" + }, + { + "icon": "GoogleGenerativeAI" + }, + { + "icon": "OpenAI" + }, + { + "icon": "Azure" + }, + { + "icon": "brain" + } + ], + "placeholder": "", + "real_time_refresh": true, + "refresh_button": false, + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Azure OpenAI" + }, + "api_key": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Azure Chat OpenAI API Key", + "dynamic": false, + "info": "", + "input_types": [], + "load_from_db": true, + "name": "api_key", + "password": true, + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "type": "str", + "value": "" + }, + "api_version": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "API Version", + "dynamic": false, + "external_options": {}, + "info": "", + "input_types": [], + "name": "api_version", + "options": [ + "2025-02-01-preview", + "2025-01-01-preview", + "2024-12-01-preview", + "2024-10-01-preview", + "2024-09-01-preview", + "2024-08-01-preview", + "2024-07-01-preview", + "2024-06-01", + "2024-03-01-preview", + "2024-02-15-preview", + "2023-12-01-preview", + "2023-05-15" + ], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "2025-01-01-preview" + }, + "azure_deployment": { + "_input_type": "MessageTextInput", + "advanced": false, + "display_name": "Deployment Name", + "dynamic": false, + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "azure_deployment", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "gpt-4o" + }, + "azure_endpoint": { + "_input_type": "MessageTextInput", + "advanced": false, + "display_name": "Azure Endpoint", + "dynamic": false, + "info": "Your Azure endpoint, including the resource. Example: `https://example-resource.azure.openai.com/`", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": true, + "name": "azure_endpoint", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "azure_endpoint_gpt-4o" + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "input_types": [], + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import (\n ToolCallingAgentComponent,\n)\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n IntInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\nfrom langflow.custom.default_providers import apply_provider_defaults\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"OpenAI\", \"Azure OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n if \"OpenAI\" in MODEL_PROVIDERS_DICT:\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n else:\n openai_inputs_filtered = []\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST],\n value=\"OpenAI\",\n real_time_refresh=True,\n refresh_button=False,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST if key in MODELS_METADATA]\n + [{\"icon\": \"brain\"}],\n external_options={\n \"fields\": {\n \"data\": {\n \"node\": {\n \"name\": \"connect_other_models\",\n \"display_name\": \"Connect other models\",\n \"icon\": \"CornerDownLeft\",\n }\n }\n },\n },\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\n \"true\",\n \"1\",\n \"t\",\n \"y\",\n \"yes\",\n ]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (\n ExceptionWithMessageError,\n ValueError,\n TypeError,\n RuntimeError,\n ) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (\n ExceptionWithMessageError,\n ValueError,\n TypeError,\n NotImplementedError,\n AttributeError,\n ) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(\n session_id=self.graph.session_id,\n order=\"Ascending\",\n n_messages=self.n_messages,\n )\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n \n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n build_config.update(fields_to_add)\n \n # Apply provider-specific defaults (only for Azure OpenAI currently)\n if field_value == \"Azure OpenAI\":\n build_config = apply_provider_defaults(field_value, build_config)\n \n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n \n elif field_value == \"connect_other_models\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST],\n real_time_refresh=True,\n refresh_button=False,\n input_types=[\"LanguageModel\"],\n placeholder=\"Awaiting model input.\",\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST if key in MODELS_METADATA],\n external_options={\n \"fields\": {\n \"data\": {\n \"node\": {\n \"name\": \"connect_other_models\",\n \"display_name\": \"Connect other models\",\n \"icon\": \"CornerDownLeft\",\n },\n }\n },\n },\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n \n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n \n # Rest of your existing method remains unchanged...\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\",\n tool_description=description,\n callbacks=self.get_langchain_callbacks(),\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n" + }, + "format_instructions": { + "_input_type": "MultilineInput", + "advanced": true, + "copy_field": false, + "display_name": "Output Format Instructions", + "dynamic": false, + "info": "Generic Template for structured output formatting. Valid only with Structured response.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "format_instructions", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "You are an AI that extracts structured JSON objects from unstructured text. Use a predefined schema with expected types (str, int, float, bool, dict). Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. Fill missing or ambiguous values with defaults: null for missing values. Remove exact duplicates but keep variations that have different field values. Always return valid JSON in the expected format, never throw errors. If multiple objects can be extracted, return them all in the structured format." + }, + "handle_parsing_errors": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Handle Parse Errors", + "dynamic": false, + "info": "Should the Agent fix errors when reading user input for better processing?", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "handle_parsing_errors", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "input_value": { + "_input_type": "MessageInput", + "advanced": false, + "display_name": "Input", + "dynamic": false, + "info": "The input provided by the user for the agent to process.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "input_value", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": true, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "max_iterations": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Max Iterations", + "dynamic": false, + "info": "The maximum number of attempts the agent can make to complete its task before it stops.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "max_iterations", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 15 + }, + "max_tokens": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Max Tokens", + "dynamic": false, + "info": "The maximum number of tokens to generate. Set to 0 for unlimited tokens.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "max_tokens", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 0 + }, + "n_messages": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Number of Chat History Messages", + "dynamic": false, + "info": "Number of chat history messages to retrieve.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "n_messages", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 100 + }, + "output_schema": { + "_input_type": "TableInput", + "advanced": true, + "display_name": "Output Schema", + "dynamic": false, + "info": "Schema Validation: Define the structure and data types for structured output. No validation if no output schema.", + "input_types": [], + "is_list": true, + "list_add_label": "Add More", + "name": "output_schema", + "placeholder": "", + "required": false, + "show": true, + "table_icon": "Table", + "table_schema": { + "columns": [ + { + "default": "field", + "description": "Specify the name of the output field.", + "disable_edit": false, + "display_name": "Name", + "edit_mode": "inline", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "name", + "sortable": true, + "type": "str" + }, + { + "default": "description of field", + "description": "Describe the purpose of the output field.", + "disable_edit": false, + "display_name": "Description", + "edit_mode": "popover", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "description", + "sortable": true, + "type": "str" + }, + { + "default": "str", + "description": "Indicate the data type of the output field (e.g., str, int, float, bool, dict).", + "disable_edit": false, + "display_name": "Type", + "edit_mode": "inline", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "type", + "options": [ + "str", + "int", + "float", + "bool", + "dict" + ], + "sortable": true, + "type": "str" + }, + { + "default": false, + "description": "Set to True if this output field should be a list of the specified type.", + "disable_edit": false, + "display_name": "As List", + "edit_mode": "inline", + "filterable": true, + "formatter": "boolean", + "hidden": false, + "name": "multiple", + "sortable": true, + "type": "boolean" + } + ] + }, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "trigger_icon": "Table", + "trigger_text": "Open table", + "type": "table", + "value": [] + }, + "system_prompt": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "Agent Instructions", + "dynamic": false, + "info": "System Prompt: Initial instructions and context provided to guide the agent's behavior.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "system_prompt", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "You are a clinical data specialist. Extract and link clinical entities to medical codes.\n\n## TWO-STEP WORKFLOW\n\n### STEP 1: Extract Clinical Entities\n- Call the clinical entity extraction tool with the user's raw text\n- Receive JSON with extracted entities in the prediction array\n- Store this complete output for Step 2\n\n### STEP 2: Link Entities to Medical Codes\n- Pass the ENTIRE Step 1 output to the combined entity linking tool\n- Receive entities with linked medical codes (ICD-10, CPT, RxNorm)\n- Filter to keep ONLY entities that have at least one medical code\n\n## FILTERING RULE\n\nKeep entities where ANY of these exist:\n- `ICD10CMConcepts` (diagnoses/conditions)\n- `CPT_Current_Procedural_Terminology` (procedures)\n- `RxNormConcepts` (medications)\n```python\nfiltered = [\n e for e in entities \n if e.get(\"ICD10CMConcepts\") \n or e.get(\"CPT_Current_Procedural_Terminology\") \n or e.get(\"RxNormConcepts\")\n]\n```\n\n## EXECUTION SEQUENCE\n```\nUser text\n ↓\nStep 1: clinical_entity_extraction(text)\n ↓\nentities_json\n ↓\nStep 2: combined_entity_linking(data=entities_json)\n ↓\nlinked_entities\n ↓\nFilter: Keep only entities with medical codes\n ↓\nReport filtered results\n```\n\n## OUTPUT FORMAT\n\n**Summary:**\n- Entities extracted: [Step 1 count]\n- Entities with medical codes: [filtered count]\n\n**Entities with Medical Codes:**\n\nFor each entity:\n- **Text:** [entity text]\n- **Category:** [category]\n- **Medical Codes:**\n - ICD-10: [code] - [description] (Score: [score])\n - CPT: [code] - [description] (Score: [score])\n - RxNorm: [code] - [description] (Score: [score])\n\n## CRITICAL RULES\n\n1. ALWAYS call clinical entity extraction tool FIRST\n2. ALWAYS pass Step 1 output to entity linking tool\n3. NEVER skip either step\n4. ONLY report entities with at least one medical code\n5. DO NOT pass raw text to the linking tool\n" + }, + "temperature": { + "_input_type": "SliderInput", + "advanced": true, + "display_name": "Temperature", + "dynamic": false, + "info": "Controls randomness. Lower values are more deterministic, higher values are more creative.", + "input_types": [], + "max_label": "", + "max_label_icon": "", + "min_label": "", + "min_label_icon": "", + "name": "temperature", + "placeholder": "", + "range_spec": { + "max": 2, + "min": 0, + "step": 0.01, + "step_type": "float" + }, + "required": false, + "show": true, + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "title_case": false, + "tool_mode": false, + "type": "slider", + "value": 0.08 + }, + "tools": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Tools", + "dynamic": false, + "info": "These are the tools that the agent can use to help with tasks.", + "input_types": [ + "Tool" + ], + "list": true, + "list_add_label": "Add More", + "name": "tools", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "verbose": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Verbose", + "dynamic": false, + "info": "", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "verbose", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "Agent" + }, + "id": "Agent-pqxdi", + "measured": { + "height": 759, + "width": 320 + }, + "position": { + "x": 2141.8103842443415, + "y": 344.24995821689436 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "AutonomizeModel-RGBq4", "node": { "base_classes": [ "Data" ], "beta": false, - "category": "autonomize_models", "conditional_paths": [], "custom_fields": {}, - "description": "Extract clinical entities from text using Clinical LLM.", - "display_name": "Clinical LLM", - "documentation": "https://docs.example.com/clinical-llm", + "description": "Unified interface for Autonomize text-based AI models with dropdown selection", + "display_name": "Autonomize Model", + "documentation": "https://docs.example.com/autonomize-models", "edited": false, "field_order": [ + "selected_model", "search_query" ], "frozen": false, "icon": "Autonomize", - "key": "ClinicalLLM", + "last_updated": "2025-10-29T10:50:18.036Z", "legacy": false, - "lf_version": "1.4.3", + "lf_version": "1.6.3", "metadata": {}, "minimized": false, "output_types": [], @@ -836,54 +1294,333 @@ { "allows_loop": false, "cache": true, - "display_name": "Clinical Entities", - "method": "build_output", - "name": "prediction", - "selected": "Data", + "display_name": "Toolset", + "group_outputs": false, + "hidden": null, + "method": "to_toolkit", + "name": "component_as_tool", + "options": null, + "required_inputs": null, + "selected": "Tool", "tool_mode": true, "types": [ - "Data" + "Tool" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "priority": 1, + "template": { + "_type": "Component", + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "\"\"\"Autonomize Model Component - Unified text-based model component with dropdown selection.\"\"\"\n\nimport ast\nimport json\nfrom typing import Any\n\nfrom langflow.services.modelhub.model_endpoint import ModelEndpoint\nfrom loguru import logger\n\nfrom langflow.base.modelhub import ATModelComponent\nfrom langflow.inputs.inputs import FieldTypes\nfrom langflow.io import DropdownInput, MultilineInput, Output\nfrom langflow.schema.data import Data\n\n\nclass AutonomizeModelComponent(ATModelComponent):\n \"\"\"Unified component for Autonomize text-based models with dropdown selection.\"\"\"\n\n display_name: str = \"Autonomize Model\"\n description: str = \"Unified interface for Autonomize text-based AI models with dropdown selection\"\n documentation: str = \"https://docs.example.com/autonomize-models\"\n icon: str = \"Autonomize\"\n name: str = \"AutonomizeModel\"\n category: str = \"models\"\n priority: int = 1 # High priority to appear near top\n\n # Model mapping for dropdown options\n MODEL_OPTIONS = {\n \"Clinical LLM\": ModelEndpoint.CLINICAL_LLM,\n \"Clinical Note Classifier\": ModelEndpoint.CLINICAL_NOTE_CLASSIFIER,\n \"Combined Entity Linking\": ModelEndpoint.COMBINED_ENTITY_LINKING,\n \"CPT Code\": ModelEndpoint.CPT_CODE,\n \"ICD-10 Code\": ModelEndpoint.ICD_10,\n \"RxNorm Code\": ModelEndpoint.RXNORM,\n \"Short Summary\": ModelEndpoint.SHORT_SUMMARY,\n \"Detailed Summary\": ModelEndpoint.DETAILED_SUMMARY,\n \"Page Level Classifier\": ModelEndpoint.PAGE_LEVEL_CLASSIFIER\n }\n\n # Model descriptions for UI\n MODEL_DESCRIPTIONS = {\n \"Clinical LLM\": \"Extract clinical entities from medical text\",\n \"Clinical Note Classifier\": \"Classify clinical notes by type\",\n \"Combined Entity Linking\": \"Link extracted entities to standard vocabularies\",\n \"CPT Code\": \"Extract CPT codes from medical text\",\n \"ICD-10 Code\": \"Extract ICD-10 codes from medical text\",\n \"RxNorm Code\": \"Extract RxNorm codes for medications\",\n \"Short Summary\": \"short summary\",\n \"Detailed Summary\": \"detailed summary\",\n \"Page Level Classifier\": \"page type classification\"\n }\n\n inputs = [\n DropdownInput(\n name=\"selected_model\",\n display_name=\"Model\",\n options=list(MODEL_OPTIONS.keys()),\n value=next(iter(MODEL_OPTIONS.keys())),\n info=\"Select the Autonomize model to use\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"search_query\",\n display_name=\"Text Input\",\n field_type=FieldTypes.TEXT,\n multiline=True,\n tool_mode=True,\n info=\"Input text to process with the selected model\",\n ),\n ]\n\n outputs = [\n Output(\n name=\"prediction\",\n display_name=\"Model Output\",\n method=\"build_output\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._current_model_endpoint = None\n # Initialize _model_name with the default model endpoint (required by ATModelComponent)\n self._model_name = self.MODEL_OPTIONS[next(iter(self.MODEL_OPTIONS.keys()))]\n\n @property\n def model_endpoint(self) -> ModelEndpoint:\n \"\"\"Get the current model endpoint based on selection.\"\"\"\n return self.MODEL_OPTIONS[self.selected_model]\n\n @property\n def model_name_from_endpoint(self) -> str:\n \"\"\"Get the model name from the ModelEndpoint.\"\"\"\n return self.model_endpoint.get_model()\n\n async def extract_entities(self, text: Any) -> dict:\n \"\"\"Extract entities using the selected model.\"\"\"\n # Handle different input formats\n if isinstance(text, str) and text.strip().startswith(\"{\"):\n try:\n text_dict = json.loads(text)\n text = text_dict\n except json.JSONDecodeError as e:\n logger.error(f\"Failed to parse JSON string: {e}\")\n # If JSON parsing fails, use the original text\n\n # Handle the case where input is a dictionary with result structure\n if isinstance(text, dict) and \"result\" in text:\n result = text[\"result\"]\n if isinstance(result, list) and len(result) > 0:\n # Extract text from the first result item\n first_result = result[0]\n if isinstance(first_result, dict) and \"text\" in first_result:\n extracted_text = first_result[\"text\"]\n text = extracted_text\n else:\n msg = \"First result item does not contain 'text' key\"\n raise ValueError(msg)\n else:\n msg = \"Result list is empty or not a list\"\n raise ValueError(msg)\n elif isinstance(text, dict) and \"text\" in text:\n text = text[\"text\"]\n elif hasattr(text, \"text\"):\n text = text.text\n\n try:\n # Use the standard predict method from ATModelComponent\n # Set the _model_name based on current selection\n self._model_name = self.model_endpoint\n\n response = await self.predict(text=text)\n\n # Handle string responses\n if isinstance(response, str):\n try:\n response = ast.literal_eval(response)\n except (ValueError, SyntaxError):\n # If it's not a valid Python literal, try JSON\n try:\n response = json.loads(response)\n except json.JSONDecodeError:\n # If neither works, wrap in a dict\n response = {\"result\": response}\n else:\n return response\n except Exception as e:\n msg = f\"Error processing with {self.model_name}: {e!s}\"\n logger.error(f\"API call failed: {msg}\")\n raise ValueError(msg) from e\n\n async def build_output(self) -> Data:\n \"\"\"Generate the output based on selected model.\"\"\"\n query_results = await self.extract_entities(self.search_query)\n\n # Create standardized output format\n output_data = {\n \"model\": self.selected_model,\n \"model_description\": self.MODEL_DESCRIPTIONS.get(self.selected_model, \"\"),\n \"data\": query_results\n }\n\n data = Data(value=output_data)\n self.status = f\"Processed with {self.selected_model}\"\n return data\n\n def build(self):\n \"\"\"Return the main build function for Langflow framework.\"\"\"\n return self.build_output\n" + }, + "search_query": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "Text Input", + "dynamic": false, + "info": "Input text to process with the selected model", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "search_query", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": true, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "selected_model": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model", + "dynamic": false, + "external_options": {}, + "info": "Select the Autonomize model to use", + "name": "selected_model", + "options": [ + "Clinical LLM", + "Clinical Note Classifier", + "Combined Entity Linking", + "CPT Code", + "ICD-10 Code", + "RxNorm Code", + "Short Summary", + "Detailed Summary", + "Page Level Classifier" + ], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Clinical LLM" + }, + "tools_metadata": { + "_input_type": "ToolsInput", + "advanced": false, + "display_name": "Actions", + "dynamic": false, + "info": "Modify tool names and descriptions to help agents understand when to use each tool.", + "is_list": true, + "list_add_label": "Add More", + "name": "tools_metadata", + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "tools", + "value": [ + { + "args": { + "search_query": { + "default": "", + "description": "Input text to process with the selected model", + "title": "Search Query", + "type": "string" + } + }, + "description": "used to extract clinical entities from the text", + "display_description": "Unified interface for Autonomize text-based AI models with dropdown selection", + "display_name": "build_output", + "name": "clinical_entity_extractor", + "readonly": false, + "status": true, + "tags": [ + "build_output" + ] + } + ] + } + }, + "tool_mode": true + }, + "showNode": true, + "type": "AutonomizeModel" + }, + "id": "AutonomizeModel-RGBq4", + "measured": { + "height": 300, + "width": 320 + }, + "position": { + "x": 1389.9314787549552, + "y": 219 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "ChatOutput-3xSmo", + "node": { + "base_classes": [ + "Message" + ], + "beta": false, + "category": "input_output", + "conditional_paths": [], + "custom_fields": {}, + "description": "Display a chat message in the Playground.", + "display_name": "Chat Output", + "documentation": "https://docs.langflow.org/components-io#chat-output", + "edited": false, + "field_order": [ + "input_value", + "should_store_message", + "sender", + "sender_name", + "session_id", + "data_template" + ], + "frozen": false, + "icon": "MessagesSquare", + "key": "ChatOutput", + "legacy": false, + "lf_version": "1.6.3", + "metadata": {}, + "minimized": true, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Output Message", + "group_outputs": false, + "method": "message_response", + "name": "message", + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "score": 0.003169567463043492, + "template": { + "_type": "Component", + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom langflow.base.io.chat import ChatComponent\nfrom langflow.helpers.data import safe_convert\nfrom langflow.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.schema.properties import Source\nfrom langflow.template.field.base import Output\nfrom langflow.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, icon, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" + }, + "data_template": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Data Template", + "dynamic": false, + "info": "Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "data_template", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "{text}" + }, + "input_value": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Inputs", + "dynamic": false, + "info": "Message to be passed as output.", + "input_types": [ + "Data", + "DataFrame", + "Message" ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "score": 0.18224241177399383, - "template": { - "_type": "Component", - "code": { + "list": false, + "list_add_label": "Add More", + "name": "input_value", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "sender": { + "_input_type": "DropdownInput", "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", + "combobox": false, + "dialog_inputs": {}, + "display_name": "Sender Type", + "dynamic": false, + "external_options": {}, + "info": "Type of sender.", + "name": "sender", + "options": [ + "Machine", + "User" + ], + "options_metadata": [], + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Machine" + }, + "sender_name": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Sender Name", + "dynamic": false, + "info": "Name of the sender.", + "input_types": [ + "Message" + ], "list": false, + "list_add_label": "Add More", "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, + "name": "sender_name", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, - "type": "code", - "value": "from langflow.inputs.input_mixin import FieldTypes\nfrom langflow.io import MultilineInput, Output\nfrom langflow.schema import Data\nfrom pydantic import BaseModel\nimport json\nimport logging\n\nfrom app.base.modelhub import ATModelComponent\nfrom app.services.modelhub.model_endpoint import ModelEndpoint\n\nlogger = logging.getLogger(__name__)\n\n\nclass Trait(BaseModel):\n Name: str\n Score: float\n\n\nclass Attribute(BaseModel):\n Id: int\n BeginOffset: int\n EndOffset: int\n Text: str\n Score: float\n Category: str\n Type: str\n Traits: list[Trait]\n\n\nclass Entity(BaseModel):\n Category: str\n Type: str\n Text: str\n BeginOffset: int\n EndOffset: int\n Score: float\n Traits: list[Trait]\n Id: int\n Attributes: list[Attribute] | None = None\n\n\nclass ClinicalPrediction(BaseModel):\n prediction: list[Entity]\n\n\nclass ClinicalLLMComponent(ATModelComponent):\n \"\"\"Component for the Clinical LLM model\"\"\"\n\n display_name: str = \"Clinical LLM\"\n description: str = \"Extract clinical entities from text using Clinical LLM.\"\n documentation: str = \"https://docs.example.com/clinical-llm\"\n icon: str = \"Autonomize\"\n name: str = \"ClinicalLLM\"\n _model_name = ModelEndpoint.CLINICAL_LLM\n\n inputs = [\n MultilineInput(\n name=\"search_query\",\n display_name=\"Search query\",\n field_type=FieldTypes.TEXT,\n multiline=True,\n )\n ]\n\n outputs = [\n Output(\n name=\"prediction\", display_name=\"Clinical Entities\", method=\"build_output\"\n ),\n ]\n\n async def extract_entities(self, text) -> ClinicalPrediction:\n \"\"\"Extract clinical entities from the input text\"\"\"\n # Handle the case where input is a JSON string\n if isinstance(text, str) and text.strip().startswith('{'):\n try:\n import json\n text_dict = json.loads(text)\n text = text_dict\n except json.JSONDecodeError as e:\n logger.error(f\"Failed to parse JSON string: {e}\")\n # If JSON parsing fails, use the original text\n pass\n \n # Handle the case where input is a dictionary with result structure\n if isinstance(text, dict) and \"result\" in text:\n result = text[\"result\"]\n if isinstance(result, list) and len(result) > 0:\n # Extract text from the first result item\n first_result = result[0]\n if isinstance(first_result, dict) and \"text\" in first_result:\n extracted_text = first_result[\"text\"]\n text = extracted_text\n else:\n raise ValueError(\"First result item does not contain 'text' key\")\n else:\n raise ValueError(\"Result list is empty or not a list\")\n elif isinstance(text, dict) and \"text\" in text:\n text = text[\"text\"]\n elif hasattr(text, \"text\"):\n text = text.text\n \n try:\n response = await self.predict(text=text)\n return ClinicalPrediction(**response)\n except Exception as e:\n msg = f\"Error extracting clinical entities: {e!s}\"\n logger.error(f\"API call failed: {msg}\")\n raise ValueError(msg) from e\n\n async def build_output(self) -> Data:\n \"\"\"Generate the output based on selected knowledgehub hubs.\"\"\"\n query_results = await self.extract_entities(self.search_query)\n data = Data(value={\"data\": query_results})\n self.status = data\n return data\n \n def build(self):\n \"\"\"Return the main build function for Langflow framework.\"\"\"\n return self.build_output\n" + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "AI" }, - "search_query": { - "_input_type": "MultilineInput", - "advanced": false, - "copy_field": false, - "display_name": "Search query", + "session_id": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Session ID", "dynamic": false, - "info": "", + "info": "The session ID of the chat. If empty, the current session ID parameter will be used.", "input_types": [ "Message" ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "multiline": true, - "name": "search_query", + "name": "session_id", "placeholder": "", "required": false, "show": true, @@ -893,49 +1630,70 @@ "trace_as_metadata": true, "type": "str", "value": "" + }, + "should_store_message": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Store Messages", + "dynamic": false, + "info": "Store the message in the history.", + "list": false, + "list_add_label": "Add More", + "name": "should_store_message", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true } }, "tool_mode": false }, - "showNode": true, - "type": "ClinicalLLM" + "showNode": false, + "type": "ChatOutput" }, - "dragging": false, - "id": "ClinicalLLM-I88wH", + "id": "ChatOutput-3xSmo", "measured": { - "height": 253, - "width": 320 + "height": 48, + "width": 192 }, "position": { - "x": 1926.9833371458706, - "y": 1311.2549432480278 + "x": 2503.712841397065, + "y": 580.3692648499549 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "CombinedEntityLinking-vWvxa", + "id": "BlobStorage-GQsIW", "node": { "base_classes": [ "Data" ], "beta": false, - "category": "autonomize_models", + "category": "input_output", "conditional_paths": [], "custom_fields": {}, - "description": "Model for Combined Entity Linking", - "display_name": "Combined Entity Linking", - "documentation": "https://docs.example.com/clinical-llm", + "description": "Load files from Azure Blob Storage", + "display_name": "Blob Storage", + "documentation": "http://docs.langflow.org/components/storage", "edited": false, "field_order": [ - "entities" + "storage_account", + "container_name", + "file_name", + "return_all_files" ], "frozen": false, "icon": "Autonomize", - "key": "CombinedEntityLinking", + "key": "BlobStorage", + "last_updated": "2025-10-28T20:15:03.077Z", "legacy": false, - "lf_version": "1.4.3", + "lf_version": "1.6.3", "metadata": {}, "minimized": false, "output_types": [], @@ -943,9 +1701,12 @@ { "allows_loop": false, "cache": true, - "display_name": "Combined Entity Linking", - "method": "build_output", - "name": "prediction", + "display_name": "File Path", + "group_outputs": false, + "method": "get_file_paths", + "name": "file_path", + "options": null, + "required_inputs": null, "selected": "Data", "tool_mode": true, "types": [ @@ -955,7 +1716,7 @@ } ], "pinned": false, - "score": 0.006637430704019091, + "score": 0.2945640631554785, "template": { "_type": "Component", "code": { @@ -974,72 +1735,335 @@ "show": true, "title_case": false, "type": "code", - "value": "from typing import Any\nimport json\n\nfrom langflow.io import DataInput, Output\nfrom langflow.schema import Data\nfrom loguru import logger\nfrom pydantic import BaseModel\n\nfrom app.base.modelhub import ATModelComponent\nfrom app.services.modelhub.model_endpoint import ModelEndpoint\n\n\nclass EntityAttribute(BaseModel):\n BeginOffset: int\n EndOffset: int\n Id: int\n Score: float\n Text: str\n Category: str\n Type: str\n Attributes: list = []\n Traits: list = []\n\n\nclass EntityRequest(BaseModel):\n response: list[EntityAttribute]\n\n\nclass CombinedEntityLinkingComponent(ATModelComponent): \n display_name = \"Combined Entity Linking\"\n description = \"Model for Combined Entity Linking\"\n documentation = \"https://docs.example.com/clinical-llm\"\n icon = \"Autonomize\"\n name = \"CombinedEntityLinking\"\n _model_name = ModelEndpoint.COMBINED_ENTITY_LINKING\n\n inputs = [\n DataInput(\n name=\"entities\",\n display_name=\"Entities\",\n info=\"JSON array of entities to process\",\n required=True,\n )\n ]\n\n outputs = [\n Output(\n name=\"prediction\",\n display_name=\"Combined Entity Linking\",\n method=\"build_output\",\n ),\n ]\n\n async def process_entities(self, data: dict) -> Any:\n try:\n if \"value\" in data and isinstance(data[\"value\"], dict):\n data = data[\"value\"]\n\n if \"data\" in data:\n predictions = data[\"data\"].prediction\n logger.debug(f\"Predictions: {predictions}\")\n request = {\n \"response\": [\n {\n \"BeginOffset\": entity.BeginOffset,\n \"Category\": entity.Category,\n \"EndOffset\": entity.EndOffset,\n \"Id\": entity.Id,\n \"Score\": entity.Score,\n \"Text\": entity.Text,\n \"Type\": entity.Type,\n \"Attributes\": [\n attr.model_dump() for attr in (entity.Attributes or [])\n ],\n \"Traits\": [trait.model_dump() for trait in entity.Traits],\n }\n for entity in predictions\n ]\n }\n # Format the request as a JSON string wrapped in a \"text\" field\n formatted_request = {\n \"text\": json.dumps(request)\n }\n logger.debug(f\"Formatted request: {formatted_request}\")\n responses = await self.predict(**formatted_request)\n logger.debug(f\"responses: {responses}\")\n return responses\n\n # Handle case where input is not from \"data\" field\n formatted_request = {\n \"text\": json.dumps(data)\n }\n return await self.predict(**formatted_request)\n\n except Exception as e:\n logger.error(f\"Error processing entities: {e!s}\")\n raise ValueError(f\"Error processing entities: {e!s}\")\n\n async def build_output(self) -> Data:\n logger.debug(f\"CombinedEntityLinkingComponent input entities: {self.entities}\")\n if not self.entities:\n raise ValueError(\"Invalid input: no entities provided\")\n\n results = await self.process_entities(self.entities.data)\n if results[\"statusCode\"] == 200:\n return Data(value=results[\"data\"])\n\n raise ValueError(f\"Error processing entities: {results['statusCode']}\")" + "value": "\"\"\"Blob Storage Component for loading files from Azure Blob Storage.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.services.deps import get_flexstore_service\nfrom loguru import logger\n\n\nclass BlobStorageComponent(Component):\n display_name = \"Blob Storage\"\n category: str = \"input_output\"\n description = \"Load files from Azure Blob Storage\"\n documentation = \"http://docs.langflow.org/components/storage\"\n icon = \"Autonomize\"\n name = \"BlobStorage\"\n\n # Match the property name expected by FileComponent\n FILE_PATH_FIELD = \"file_path\"\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._container_list: list[str] = []\n self._file_list: list[str] = []\n\n inputs = [\n StrInput(\n name=\"storage_account\",\n display_name=\"Storage Account\",\n required=False,\n info=\"Storage Account name\",\n advanced=True,\n ),\n DropdownInput(\n name=\"container_name\",\n display_name=\"Container\",\n info=\"Select a container from the storage account\",\n required=True,\n refresh_button=True,\n ),\n DropdownInput(\n name=\"file_name\",\n display_name=\"File\",\n info=\"Select a file from the container\",\n required=True,\n refresh_button=True,\n ),\n BoolInput(\n name=\"return_all_files\",\n display_name=\"Return All Files\",\n info=\"If true and no specific file is selected, returns all files in the container\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"file_path\", # Match the property name expected by FileComponent\n display_name=\"File Path\",\n method=\"get_file_paths\",\n ),\n ]\n\n async def update_build_config(\n self, build_config: dict, field_value: Any, field_name: str | None = None\n ):\n \"\"\"Update the build configuration based on field changes.\"\"\"\n logger.info(f\"update_build_config called with field_name: {field_name}\")\n\n storage_account = getattr(self, \"storage_account\", None)\n container_name = getattr(self, \"container_name\", None)\n\n if field_name == \"container_name\":\n try:\n # Load the container options when the field is refreshed\n service = get_flexstore_service()\n self._container_list = await service.get_containers(storage_account)\n\n build_config[\"container_name\"][\"options\"] = self._container_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating container list: {e!s}\")\n raise\n\n elif field_name == \"file_name\" and container_name:\n try:\n # Load the file options when the field is refreshed\n service = get_flexstore_service()\n self._file_list = await service.get_files(\n storage_account, container_name\n )\n\n build_config[\"file_name\"][\"options\"] = self._file_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating file list: {e!s}\")\n raise\n\n return build_config\n\n async def get_file_paths(self) -> list[Data]:\n \"\"\"Get file paths for the FileComponent to process.\"\"\"\n try:\n if not self.container_name:\n logger.warning(\"Container name is required.\")\n return []\n\n service = get_flexstore_service()\n file_paths = []\n\n # If a specific file is selected\n if self.file_name:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, self.file_name\n )\n if signed_url:\n file_paths = [Data(data={self.FILE_PATH_FIELD: signed_url})]\n # If no specific file is selected and return_all_files is True\n elif self.return_all_files:\n files = await service.get_files(\n self.storage_account, self.container_name\n )\n for file in files:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, file\n )\n if signed_url:\n file_paths.append(Data(data={self.FILE_PATH_FIELD: signed_url}))\n\n if file_paths:\n self.status = file_paths\n logger.info(f\"Generated {len(file_paths)} file paths\")\n for path in file_paths:\n logger.debug(f\"File path: {path.data.get(self.FILE_PATH_FIELD)}\")\n else:\n logger.warning(\"No file paths generated\")\n\n return file_paths\n\n except Exception as e:\n logger.error(f\"Error in get_file_paths: {e!s}\")\n return []\n" + }, + "container_name": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Container", + "dynamic": false, + "external_options": {}, + "info": "Select a container from the storage account", + "name": "container_name", + "options": [ + "a-and-g-case-summary", + "a-and-g-case-summary-for-client", + "a-and-g-input", + "a-and-g-ocr-cache", + "a-and-g-ocr-post-processed-cache", + "a-and-g-page-images", + "a-and-g-reimagined", + "ai-studio-v2", + "aircare-dev-4002", + "aml-environment-image-build", + "ang-images", + "anywhere-200-files-input", + "anywhere-200-files-output", + "anywhere-input-docs-test", + "anywhere-output-docs-test", + "argo-flow-artifacts", + "atena", + "athena-data", + "autonomize-mlflow-artifacts", + "azure-webjobs-hosts", + "azure-webjobs-secrets", + "azureml", + "azureml-blobstore-3894b54e-0ee2-4e07-9b73-c3b30dc36b53", + "azureml-environments", + "azureml-metrics", + "backup-of-aws-instance", + "bcbs-ma-interqual-medical-policy", + "bcbs-medical-policy", + "benefit-accumulator-guidelines", + "benefit-check-eoc", + "benefit-eoc-guidelines", + "benefit-eoc-output", + "benefit-eoc-payload", + "bluecross-ca", + "carelon-guidelines", + "carelon-guidelines-v2", + "claims-docs", + "claims-qnext-response", + "cllm-v2-data", + "cms", + "correspondance-examples", + "datavant-storage-container", + "eoc-documents", + "etf-images", + "evicore-guidelines", + "fax-documents", + "fax-images", + "fax-images-2", + "fax-insights", + "fax-ocr-cache", + "fax-processor-validation-datasets", + "genesis-container", + "genesis-correspondence-automation-copilot", + "genesis-correspondence-bulk-ingestion-test", + "genesis-cph-demo-v2", + "genesis-dev-test-3012", + "genesis-dev-v2", + "genesis-platform-3010", + "genesis-platform-anywhere-prod", + "genesis-platform-cigna-dev", + "genesis-platform-demo", + "genesis-platform-demo-v2", + "genesis-platform-dev", + "genesis-platform-molina-uat", + "genesis-platform-qa", + "genesis-platform-v2-lab", + "hedis-page-images", + "hedis-page-ocr", + "indexbackup", + "insights-logs-auditevent", + "insights-metrics-pt1m", + "issue-test-1", + "job-test", + "k-hub-container", + "knowledgehubembeddings", + "load-testing-files-hedis", + "mail-images", + "mlflow", + "mlflow-dev-v2", + "mlflow-integration", + "mlflow-mssql", + "mlflow-qa", + "mlflowmssql", + "mlserver-artifacts", + "mlworkspace-backup", + "model-artifacts", + "modelcards", + "modelhub", + "modelhub-demo-v2", + "modelhub-v2-lab", + "models", + "molina-refactor-temporary", + "molina-refactor-test", + "mosaic-models", + "mosaic-provider-contracts", + "mosaic-provider-contracts-temp", + "mosaic-testing", + "ner-container", + "nestedcontainer", + "pcp-docs", + "pcp-extraction-docs", + "pcp-ocr-cache", + "pharmacy-auth", + "prior-auth", + "prior-authorization", + "projectx-files", + "projectx-temp-files", + "revisions", + "shahabas-mlflow-artifacts", + "snapshots", + "snapshotzips", + "spog-output", + "spog-qnext", + "string", + "temp-directory-genesis-studio", + "temp-studio", + "temp-studio-v2", + "temporal-poc", + "test-cms", + "test-storage-container", + "test1", + "tester-fax", + "umk2dev", + "weaviate-backups" + ], + "options_metadata": [], + "placeholder": "", + "refresh_button": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "genesis-platform-demo" }, - "entities": { - "_input_type": "DataInput", + "file_name": { + "_input_type": "DropdownInput", "advanced": false, - "display_name": "Entities", + "combobox": false, + "dialog_inputs": {}, + "display_name": "File", "dynamic": false, - "info": "JSON array of entities to process", - "input_types": [ - "Data" + "external_options": {}, + "info": "Select a file from the container", + "name": "file_name", + "options": [ + "01_CaseId_MS_001.pdf", + "01_case1_redacted.pdf", + "02_0case.pdf", + "02_KJJ copy 6.pdf", + "02_KJJ copy 7.pdf", + "1056_Subject (1).pdf", + "1900_PriorAuthSample.pdf", + "2001_Subject (1).pdf", + "2001_Subject-1.pdf", + "2001_Subject.pdf", + "28ee05bd-5a48-49c1-ae3b-f81c7d1523bftesttest123.pdf", + "400_PATIENT (1).pdf", + "400_PATIENT.pdf", + "5af97bb72e4e4375829efa0a8f2db776.pdf", + "66f949a74ecda992ccc91ade_66f957aa4ecda992ccc91c09_1.png", + "6_SampleAuthScan.pdf", + "913 Manual Pend Codes - All States Medicaid and Medicare - Job Aid (1).pdf", + "913 Manual Pend Codes - All States Medicaid and Medicare - Job Aid.pdf", + "99736_PermE8_Studies_ALL.pdf", + "99736_PermE8_Studies_ALL123.pdf", + "A Retrospective Study of Clinical Efficacy.pdf", + "AGMT - HSA - BCA of Detroit LLC dba BCA StoneCrest Center (1).pdf", + "Alta Bates 2.pdf", + "Authorization Business Process - All States and LOBs - SOP.pdf", + "BSS-CMS-1500-Fillable-2022-1 2.png", + "BSS-CMS-1500-Fillable-2022-1.png", + "BSS-CMS-1500-Fillable-2022.pdf", + "BSS-CMS-1500-Fillable.png", + "Banner_Square_Code.png", + "CMS 1500_SAMPLE_2024.png", + "CMS 1500_Sample.pdf", + "Case8_PAC.pdf", + "Clotrimazole.pdf", + "Cody Bradshaw Resume.pdf", + "Coordination of Benefits (COB) - All States Medicaid - SOP (1).pdf", + "Coordination of Benefits (COB) - All States Medicaid - SOP (3).pdf", + "Coordination of Benefits (COB) - All States Medicaid - SOP.pdf", + "Corrected Claims and Replacement Bills - Medicare All States- Processing Guideline (1).pdf", + "Corrected Claims and Replacement Bills - Medicare All States- Processing Guideline .pdf", + "Duplicate Claim Processing - Medicare All States - Processing Guideline (1).pdf", + "Formulation PDF example (from FMS System).pdf", + "Formulation attachments PDF2.pdf", + "Gabapentin.pdf", + "PCP 26 JD.pdf", + "PCP 34 JNP.pdf", + "Patient Information HC2 - CBP.pdf", + "Prior Auth Sample 1.pdf", + "PriorAuth000.pdf", + "PriorAuth000_1.pdf", + "PriorAuthSample1_2.pdf", + "PriorAuthSample2 (1).pdf", + "PriorAuthSample2 (2).pdf", + "PriorAuthSample2.pdf", + "PriorAuthSample3 copy.pdf", + "PriorAuthSample3.pdf", + "PriorAuthSample5.pdf", + "Residential Purchase Agreement.pdf", + "Screenshot 2024-10-09 at 13.26.14.png", + "Screenshot 2024-10-10 at 13.23.26.png", + "Screenshot 2024-10-10 at 15.19.00.png", + "Screenshot 2024-10-21 at 12.49.29 PM.png", + "Screenshot 2024-10-21 at 18.05.46.png", + "Standard_Mutual_NDA__2024_.docx.pdf", + "Test1tolstoy (1).pdf", + "UB04 SAMPLE .pdf", + "UB04 SAMPLE .png", + "ZIP5lyout.pdf", + "complaince_summary.pdf", + "discharge_summary.pdf", + "form-1a.pdf", + "only-toc.pdf", + "patient_12783127.pdf", + "patient_12783135-2.pdf", + "patient_summary.pdf", + "prior-auth-doc.pdf", + "source_1729618954942.pdf", + "test2tolstoy (1).pdf" ], + "options_metadata": [], + "placeholder": "", + "refresh_button": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "discharge_summary.pdf" + }, + "return_all_files": { + "_input_type": "BoolInput", + "advanced": false, + "display_name": "Return All Files", + "dynamic": false, + "info": "If true and no specific file is selected, returns all files in the container", "list": false, "list_add_label": "Add More", - "name": "entities", + "name": "return_all_files", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, - "type": "other", + "type": "bool", + "value": true + }, + "storage_account": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Storage Account", + "dynamic": false, + "info": "Storage Account name", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "storage_account", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", "value": "" } }, "tool_mode": false }, "showNode": true, - "type": "CombinedEntityLinking" + "type": "BlobStorage" }, - "dragging": false, - "id": "CombinedEntityLinking-vWvxa", + "id": "BlobStorage-GQsIW", "measured": { - "height": 195, + "height": 329, "width": 320 }, "position": { - "x": 1484.9998839186437, - "y": 1574.777153096714 + "x": 891.359375, + "y": 220.68153235521322 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "EntityNormalizationExtraction-VnusN", + "id": "AutonomizeModel-VUyU1", "node": { "base_classes": [ "Data" ], "beta": false, - "category": "utils", "conditional_paths": [], "custom_fields": {}, - "description": "Identifies and extracts entities with medical codes.", - "display_name": "Entity Normalisation Extraction", - "documentation": "", + "description": "Unified interface for Autonomize text-based AI models with dropdown selection", + "display_name": "Autonomize Model", + "documentation": "https://docs.example.com/autonomize-models", "edited": false, "field_order": [ - "data" + "selected_model", + "search_query" ], "frozen": false, "icon": "Autonomize", - "key": "EntityNormalizationExtraction", + "last_updated": "2025-10-29T10:50:18.037Z", "legacy": false, - "lf_version": "1.4.3", + "lf_version": "1.6.3", "metadata": {}, "minimized": false, "output_types": [], @@ -1047,19 +2071,23 @@ { "allows_loop": false, "cache": true, - "display_name": "Filtered Entities", - "method": "process_entities", - "name": "filtered_entities", - "selected": "Data", + "display_name": "Toolset", + "group_outputs": false, + "hidden": null, + "method": "to_toolkit", + "name": "component_as_tool", + "options": null, + "required_inputs": null, + "selected": "Tool", "tool_mode": true, "types": [ - "Data" + "Tool" ], "value": "__UNDEFINED__" } ], "pinned": false, - "score": 0.01857804455091699, + "priority": 1, "template": { "_type": "Component", "code": { @@ -1078,72 +2106,147 @@ "show": true, "title_case": false, "type": "code", - "value": "from langflow.custom import Component\nfrom langflow.io import DataInput, Output\nfrom langflow.schema import Data\nfrom loguru import logger\n\n\nclass EntityNormalisationExtraction(Component):\n display_name = \"Entity Normalisation Extraction\"\n description = \"Identifies and extracts entities with medical codes.\"\n icon = \"Autonomize\"\n name = \"EntityNormalizationExtraction\"\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"Entity prediction data\",\n required=True,\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Filtered Entities\",\n name=\"filtered_entities\",\n method=\"process_entities\",\n ),\n ]\n\n def process_entities(self) -> Data:\n try:\n data = self.data.data\n if isinstance(data, dict) and \"value\" in data:\n data = data[\"value\"]\n\n logger.debug(f\"Processing data: {data}\")\n output = []\n\n for entity in data:\n if (\n entity.get(\"ICD10CMConcepts\")\n or entity.get(\"CPT_Current_Procedural_Terminology\")\n or entity.get(\"RxNormConcepts\")\n ):\n output.append(entity)\n\n logger.debug(f\"Found {len(output)} entities with medical codes\")\n return Data(value={\"data\": output})\n\n except Exception as e:\n logger.error(f\"Error processing entities: {e!s}\")\n raise ValueError(f\"Error processing entities: {e!s}\")\n" + "value": "\"\"\"Autonomize Model Component - Unified text-based model component with dropdown selection.\"\"\"\n\nimport ast\nimport json\nfrom typing import Any\n\nfrom langflow.services.modelhub.model_endpoint import ModelEndpoint\nfrom loguru import logger\n\nfrom langflow.base.modelhub import ATModelComponent\nfrom langflow.inputs.inputs import FieldTypes\nfrom langflow.io import DropdownInput, MultilineInput, Output\nfrom langflow.schema.data import Data\n\n\nclass AutonomizeModelComponent(ATModelComponent):\n \"\"\"Unified component for Autonomize text-based models with dropdown selection.\"\"\"\n\n display_name: str = \"Autonomize Model\"\n description: str = \"Unified interface for Autonomize text-based AI models with dropdown selection\"\n documentation: str = \"https://docs.example.com/autonomize-models\"\n icon: str = \"Autonomize\"\n name: str = \"AutonomizeModel\"\n category: str = \"models\"\n priority: int = 1 # High priority to appear near top\n\n # Model mapping for dropdown options\n MODEL_OPTIONS = {\n \"Clinical LLM\": ModelEndpoint.CLINICAL_LLM,\n \"Clinical Note Classifier\": ModelEndpoint.CLINICAL_NOTE_CLASSIFIER,\n \"Combined Entity Linking\": ModelEndpoint.COMBINED_ENTITY_LINKING,\n \"CPT Code\": ModelEndpoint.CPT_CODE,\n \"ICD-10 Code\": ModelEndpoint.ICD_10,\n \"RxNorm Code\": ModelEndpoint.RXNORM,\n \"Short Summary\": ModelEndpoint.SHORT_SUMMARY,\n \"Detailed Summary\": ModelEndpoint.DETAILED_SUMMARY,\n \"Page Level Classifier\": ModelEndpoint.PAGE_LEVEL_CLASSIFIER\n }\n\n # Model descriptions for UI\n MODEL_DESCRIPTIONS = {\n \"Clinical LLM\": \"Extract clinical entities from medical text\",\n \"Clinical Note Classifier\": \"Classify clinical notes by type\",\n \"Combined Entity Linking\": \"Link extracted entities to standard vocabularies\",\n \"CPT Code\": \"Extract CPT codes from medical text\",\n \"ICD-10 Code\": \"Extract ICD-10 codes from medical text\",\n \"RxNorm Code\": \"Extract RxNorm codes for medications\",\n \"Short Summary\": \"short summary\",\n \"Detailed Summary\": \"detailed summary\",\n \"Page Level Classifier\": \"page type classification\"\n }\n\n inputs = [\n DropdownInput(\n name=\"selected_model\",\n display_name=\"Model\",\n options=list(MODEL_OPTIONS.keys()),\n value=next(iter(MODEL_OPTIONS.keys())),\n info=\"Select the Autonomize model to use\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"search_query\",\n display_name=\"Text Input\",\n field_type=FieldTypes.TEXT,\n multiline=True,\n tool_mode=True,\n info=\"Input text to process with the selected model\",\n ),\n ]\n\n outputs = [\n Output(\n name=\"prediction\",\n display_name=\"Model Output\",\n method=\"build_output\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._current_model_endpoint = None\n # Initialize _model_name with the default model endpoint (required by ATModelComponent)\n self._model_name = self.MODEL_OPTIONS[next(iter(self.MODEL_OPTIONS.keys()))]\n\n @property\n def model_endpoint(self) -> ModelEndpoint:\n \"\"\"Get the current model endpoint based on selection.\"\"\"\n return self.MODEL_OPTIONS[self.selected_model]\n\n @property\n def model_name_from_endpoint(self) -> str:\n \"\"\"Get the model name from the ModelEndpoint.\"\"\"\n return self.model_endpoint.get_model()\n\n async def extract_entities(self, text: Any) -> dict:\n \"\"\"Extract entities using the selected model.\"\"\"\n # Handle different input formats\n if isinstance(text, str) and text.strip().startswith(\"{\"):\n try:\n text_dict = json.loads(text)\n text = text_dict\n except json.JSONDecodeError as e:\n logger.error(f\"Failed to parse JSON string: {e}\")\n # If JSON parsing fails, use the original text\n\n # Handle the case where input is a dictionary with result structure\n if isinstance(text, dict) and \"result\" in text:\n result = text[\"result\"]\n if isinstance(result, list) and len(result) > 0:\n # Extract text from the first result item\n first_result = result[0]\n if isinstance(first_result, dict) and \"text\" in first_result:\n extracted_text = first_result[\"text\"]\n text = extracted_text\n else:\n msg = \"First result item does not contain 'text' key\"\n raise ValueError(msg)\n else:\n msg = \"Result list is empty or not a list\"\n raise ValueError(msg)\n elif isinstance(text, dict) and \"text\" in text:\n text = text[\"text\"]\n elif hasattr(text, \"text\"):\n text = text.text\n\n try:\n # Use the standard predict method from ATModelComponent\n # Set the _model_name based on current selection\n self._model_name = self.model_endpoint\n\n response = await self.predict(text=text)\n\n # Handle string responses\n if isinstance(response, str):\n try:\n response = ast.literal_eval(response)\n except (ValueError, SyntaxError):\n # If it's not a valid Python literal, try JSON\n try:\n response = json.loads(response)\n except json.JSONDecodeError:\n # If neither works, wrap in a dict\n response = {\"result\": response}\n else:\n return response\n except Exception as e:\n msg = f\"Error processing with {self.model_name}: {e!s}\"\n logger.error(f\"API call failed: {msg}\")\n raise ValueError(msg) from e\n\n async def build_output(self) -> Data:\n \"\"\"Generate the output based on selected model.\"\"\"\n query_results = await self.extract_entities(self.search_query)\n\n # Create standardized output format\n output_data = {\n \"model\": self.selected_model,\n \"model_description\": self.MODEL_DESCRIPTIONS.get(self.selected_model, \"\"),\n \"data\": query_results\n }\n\n data = Data(value=output_data)\n self.status = f\"Processed with {self.selected_model}\"\n return data\n\n def build(self):\n \"\"\"Return the main build function for Langflow framework.\"\"\"\n return self.build_output\n" }, - "data": { - "_input_type": "DataInput", + "search_query": { + "_input_type": "MultilineInput", "advanced": false, - "display_name": "Data", + "copy_field": false, + "display_name": "Text Input", "dynamic": false, - "info": "Entity prediction data", + "info": "Input text to process with the selected model", "input_types": [ - "Data" + "Message" ], "list": false, "list_add_label": "Add More", - "name": "data", + "load_from_db": false, + "multiline": true, + "name": "search_query", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, - "tool_mode": false, + "tool_mode": true, "trace_as_input": true, "trace_as_metadata": true, - "type": "other", + "type": "str", "value": "" + }, + "selected_model": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model", + "dynamic": false, + "external_options": {}, + "info": "Select the Autonomize model to use", + "name": "selected_model", + "options": [ + "Clinical LLM", + "Clinical Note Classifier", + "Combined Entity Linking", + "CPT Code", + "ICD-10 Code", + "RxNorm Code", + "Short Summary", + "Detailed Summary", + "Page Level Classifier" + ], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Combined Entity Linking" + }, + "tools_metadata": { + "_input_type": "ToolsInput", + "advanced": false, + "display_name": "Actions", + "dynamic": false, + "info": "Modify tool names and descriptions to help agents understand when to use each tool.", + "is_list": true, + "list_add_label": "Add More", + "name": "tools_metadata", + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "tools", + "value": [ + { + "args": { + "search_query": { + "default": "", + "description": "Input text to process with the selected model", + "title": "Search Query", + "type": "string" + } + }, + "description": "Model for Combined Entity Linking", + "display_description": "Unified interface for Autonomize text-based AI models with dropdown selection", + "display_name": "build_output", + "name": "combined_entity_linking", + "readonly": false, + "status": true, + "tags": [ + "build_output" + ] + } + ] } }, - "tool_mode": false + "tool_mode": true }, "showNode": true, - "type": "EntityNormalizationExtraction" + "type": "AutonomizeModel" }, "dragging": false, - "id": "EntityNormalizationExtraction-VnusN", + "id": "AutonomizeModel-VUyU1", "measured": { - "height": 215, + "height": 300, "width": 320 }, "position": { - "x": 2027.055062404865, - "y": 1834.9636387701005 + "x": 1733.359375, + "y": 121 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "JSONOutput-5QXSP", + "id": "AutonomizeModel-o8VDQ", "node": { "base_classes": [ - "Message" + "Data" ], "beta": false, - "category": "outputs", "conditional_paths": [], "custom_fields": {}, - "description": "Display input data as JSON in the Playground.", - "display_name": "JSON Output", - "documentation": "", + "description": "Unified interface for Autonomize text-based AI models with dropdown selection", + "display_name": "Autonomize Model", + "documentation": "https://docs.example.com/autonomize-models", "edited": false, "field_order": [ - "data", - "pretty_print" + "selected_model", + "search_query" ], "frozen": false, - "icon": "Braces", - "key": "JSONOutput", + "icon": "Autonomize", + "last_updated": "2025-10-29T09:56:32.763Z", "legacy": false, + "lf_version": "1.6.3", "metadata": {}, "minimized": false, "output_types": [], @@ -1151,20 +2254,23 @@ { "allows_loop": false, "cache": true, - "display_name": "JSON", + "display_name": "Model Output", "group_outputs": false, - "method": "json_response", - "name": "json", - "selected": "Message", + "hidden": null, + "method": "build_output", + "name": "prediction", + "options": null, + "required_inputs": null, + "selected": "Data", "tool_mode": true, "types": [ - "Message" + "Data" ], "value": "__UNDEFINED__" } ], "pinned": false, - "score": 0.007568328950209746, + "priority": 1, "template": { "_type": "Component", "code": { @@ -1183,79 +2289,98 @@ "show": true, "title_case": false, "type": "code", - "value": "import json\n\nfrom langflow.base.io.text import TextComponent\nfrom langflow.inputs import DataInput\nfrom langflow.io import BoolInput, Output\nfrom langflow.schema import Data\nfrom langflow.schema.message import Message\n\n\nclass JSONOutputComponent(TextComponent):\n display_name = \"JSON Output\"\n description = \"Display input data as JSON in the Playground.\"\n icon = \"Braces\"\n name = \"JSONOutput\"\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The data to convert to JSON.\",\n is_list=True,\n ),\n BoolInput(\n name=\"pretty_print\",\n display_name=\"Pretty Print\",\n info=\"Format JSON with proper indentation\",\n value=True,\n advanced=True,\n ),\n ]\n outputs = [\n Output(display_name=\"JSON\", name=\"json\", method=\"json_response\"),\n ]\n\n def _process_data(self, data: Data | list[Data]) -> dict | list:\n \"\"\"Convert Data object(s) to dictionary/list format.\"\"\"\n if isinstance(data, list):\n return [item.dict() for item in data]\n return data.dict()\n\n def json_response(self) -> Message:\n try:\n # Process the Data input\n processed_data = self._process_data(self.data)\n\n # Convert to JSON string with optional pretty printing\n if self.pretty_print:\n formatted_json = json.dumps(\n processed_data, indent=2, ensure_ascii=False\n )\n else:\n formatted_json = json.dumps(processed_data, ensure_ascii=False)\n\n message = Message(text=formatted_json)\n self.status = formatted_json\n return message\n\n except Exception as e:\n error_message = f\"Error processing data to JSON: {e!s}\"\n message = Message(text=error_message)\n self.status = error_message\n return message\n" + "value": "\"\"\"Autonomize Model Component - Unified text-based model component with dropdown selection.\"\"\"\n\nimport ast\nimport json\nfrom typing import Any\n\nfrom langflow.services.modelhub.model_endpoint import ModelEndpoint\nfrom loguru import logger\n\nfrom langflow.base.modelhub import ATModelComponent\nfrom langflow.inputs.inputs import FieldTypes\nfrom langflow.io import DropdownInput, MultilineInput, Output\nfrom langflow.schema.data import Data\n\n\nclass AutonomizeModelComponent(ATModelComponent):\n \"\"\"Unified component for Autonomize text-based models with dropdown selection.\"\"\"\n\n display_name: str = \"Autonomize Model\"\n description: str = \"Unified interface for Autonomize text-based AI models with dropdown selection\"\n documentation: str = \"https://docs.example.com/autonomize-models\"\n icon: str = \"Autonomize\"\n name: str = \"AutonomizeModel\"\n category: str = \"models\"\n priority: int = 1 # High priority to appear near top\n\n # Model mapping for dropdown options\n MODEL_OPTIONS = {\n \"Clinical LLM\": ModelEndpoint.CLINICAL_LLM,\n \"Clinical Note Classifier\": ModelEndpoint.CLINICAL_NOTE_CLASSIFIER,\n \"Combined Entity Linking\": ModelEndpoint.COMBINED_ENTITY_LINKING,\n \"CPT Code\": ModelEndpoint.CPT_CODE,\n \"ICD-10 Code\": ModelEndpoint.ICD_10,\n \"RxNorm Code\": ModelEndpoint.RXNORM,\n \"Short Summary\": ModelEndpoint.SHORT_SUMMARY,\n \"Detailed Summary\": ModelEndpoint.DETAILED_SUMMARY,\n \"Page Level Classifier\": ModelEndpoint.PAGE_LEVEL_CLASSIFIER\n }\n\n # Model descriptions for UI\n MODEL_DESCRIPTIONS = {\n \"Clinical LLM\": \"Extract clinical entities from medical text\",\n \"Clinical Note Classifier\": \"Classify clinical notes by type\",\n \"Combined Entity Linking\": \"Link extracted entities to standard vocabularies\",\n \"CPT Code\": \"Extract CPT codes from medical text\",\n \"ICD-10 Code\": \"Extract ICD-10 codes from medical text\",\n \"RxNorm Code\": \"Extract RxNorm codes for medications\",\n \"Short Summary\": \"short summary\",\n \"Detailed Summary\": \"detailed summary\",\n \"Page Level Classifier\": \"page type classification\"\n }\n\n inputs = [\n DropdownInput(\n name=\"selected_model\",\n display_name=\"Model\",\n options=list(MODEL_OPTIONS.keys()),\n value=next(iter(MODEL_OPTIONS.keys())),\n info=\"Select the Autonomize model to use\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"search_query\",\n display_name=\"Text Input\",\n field_type=FieldTypes.TEXT,\n multiline=True,\n tool_mode=True,\n info=\"Input text to process with the selected model\",\n ),\n ]\n\n outputs = [\n Output(\n name=\"prediction\",\n display_name=\"Model Output\",\n method=\"build_output\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._current_model_endpoint = None\n # Initialize _model_name with the default model endpoint (required by ATModelComponent)\n self._model_name = self.MODEL_OPTIONS[next(iter(self.MODEL_OPTIONS.keys()))]\n\n @property\n def model_endpoint(self) -> ModelEndpoint:\n \"\"\"Get the current model endpoint based on selection.\"\"\"\n return self.MODEL_OPTIONS[self.selected_model]\n\n @property\n def model_name_from_endpoint(self) -> str:\n \"\"\"Get the model name from the ModelEndpoint.\"\"\"\n return self.model_endpoint.get_model()\n\n async def extract_entities(self, text: Any) -> dict:\n \"\"\"Extract entities using the selected model.\"\"\"\n # Handle different input formats\n if isinstance(text, str) and text.strip().startswith(\"{\"):\n try:\n text_dict = json.loads(text)\n text = text_dict\n except json.JSONDecodeError as e:\n logger.error(f\"Failed to parse JSON string: {e}\")\n # If JSON parsing fails, use the original text\n\n # Handle the case where input is a dictionary with result structure\n if isinstance(text, dict) and \"result\" in text:\n result = text[\"result\"]\n if isinstance(result, list) and len(result) > 0:\n # Extract text from the first result item\n first_result = result[0]\n if isinstance(first_result, dict) and \"text\" in first_result:\n extracted_text = first_result[\"text\"]\n text = extracted_text\n else:\n msg = \"First result item does not contain 'text' key\"\n raise ValueError(msg)\n else:\n msg = \"Result list is empty or not a list\"\n raise ValueError(msg)\n elif isinstance(text, dict) and \"text\" in text:\n text = text[\"text\"]\n elif hasattr(text, \"text\"):\n text = text.text\n\n try:\n # Use the standard predict method from ATModelComponent\n # Set the _model_name based on current selection\n self._model_name = self.model_endpoint\n\n response = await self.predict(text=text)\n\n # Handle string responses\n if isinstance(response, str):\n try:\n response = ast.literal_eval(response)\n except (ValueError, SyntaxError):\n # If it's not a valid Python literal, try JSON\n try:\n response = json.loads(response)\n except json.JSONDecodeError:\n # If neither works, wrap in a dict\n response = {\"result\": response}\n else:\n return response\n except Exception as e:\n msg = f\"Error processing with {self.model_name}: {e!s}\"\n logger.error(f\"API call failed: {msg}\")\n raise ValueError(msg) from e\n\n async def build_output(self) -> Data:\n \"\"\"Generate the output based on selected model.\"\"\"\n query_results = await self.extract_entities(self.search_query)\n\n # Create standardized output format\n output_data = {\n \"model\": self.selected_model,\n \"model_description\": self.MODEL_DESCRIPTIONS.get(self.selected_model, \"\"),\n \"data\": query_results\n }\n\n data = Data(value=output_data)\n self.status = f\"Processed with {self.selected_model}\"\n return data\n\n def build(self):\n \"\"\"Return the main build function for Langflow framework.\"\"\"\n return self.build_output\n" }, - "data": { - "_input_type": "DataInput", + "search_query": { + "_input_type": "MultilineInput", "advanced": false, - "display_name": "Data", + "copy_field": false, + "display_name": "Text Input", "dynamic": false, - "info": "The data to convert to JSON.", + "info": "Input text to process with the selected model", "input_types": [ - "Data" + "Message" ], - "list": true, + "list": false, "list_add_label": "Add More", - "name": "data", + "load_from_db": false, + "multiline": true, + "name": "search_query", "placeholder": "", "required": false, "show": true, "title_case": false, - "tool_mode": false, + "tool_mode": true, "trace_as_input": true, "trace_as_metadata": true, - "type": "other", + "type": "str", "value": "" }, - "pretty_print": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Pretty Print", + "selected_model": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model", "dynamic": false, - "info": "Format JSON with proper indentation", - "list": false, - "list_add_label": "Add More", - "name": "pretty_print", + "external_options": {}, + "info": "Select the Autonomize model to use", + "name": "selected_model", + "options": [ + "Clinical LLM", + "Clinical Note Classifier", + "Combined Entity Linking", + "CPT Code", + "ICD-10 Code", + "RxNorm Code", + "Short Summary", + "Detailed Summary", + "Page Level Classifier" + ], + "options_metadata": [], "placeholder": "", + "real_time_refresh": true, "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "str", + "value": "Combined Entity Linking" } }, "tool_mode": false }, "showNode": true, - "type": "JSONOutput" + "type": "AutonomizeModel" }, "dragging": false, - "id": "JSONOutput-5QXSP", + "id": "AutonomizeModel-o8VDQ", "measured": { - "height": 195, + "height": 302, "width": 320 }, "position": { - "x": 2298.9165826918006, - "y": 2218.57191892958 + "x": 2013.4112208479319, + "y": 1114.726293509548 }, - "selected": true, + "selected": false, "type": "genericNode" } ], "viewport": { - "x": -594.978748114964, - "y": -255.80650152626083, - "zoom": 0.5995699568955621 + "x": -363.58997736331094, + "y": 38.51132958746524, + "zoom": 0.4539021955799936 } }, - "description": "Harness the Power of Conversational AI.", + "description": "Entity Normalization agent.", "endpoint_name": null, + "id": "8e4953a0-83ff-4d3d-8b3f-354c5345e25e", "is_component": false, - "last_tested_version": "1.4.3", - "name": "Entity Normalization", + "last_tested_version": "1.6.3", + "name": "Entity Normalization agent", "tags": [ "chart-review" ] diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Lab Value Extraction.json b/src/backend/base/langflow/initial_setup/starter_projects/Lab Value Extraction.json index f644657f2db4..c149e080a471 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Lab Value Extraction.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Lab Value Extraction.json @@ -6,104 +6,102 @@ "className": "", "data": { "sourceHandle": { - "dataType": "BlobStorage", - "id": "BlobStorage-dbs9g", - "name": "file_path", + "dataType": "ParserComponent", + "id": "ParserComponent-0LmpJ", + "name": "parsed_text", "output_types": [ - "Data" + "Message" ] }, "targetHandle": { - "fieldName": "url", - "id": "AzureDocumentIntelligence-Zgj5O", + "fieldName": "input_value", + "id": "Agent-8SCLe", "inputTypes": [ - "str", - "Data", - "Message", - "list" + "Message" ], - "type": "other" + "type": "str" } }, - "id": "xy-edge__BlobStorage-dbs9g{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-dbs9gœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}-AzureDocumentIntelligence-Zgj5O{œfieldNameœ:œurlœ,œidœ:œAzureDocumentIntelligence-Zgj5Oœ,œinputTypesœ:[œstrœ,œDataœ,œMessageœ,œlistœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-ParserComponent-0LmpJ{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-0LmpJœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Agent-8SCLe{œfieldNameœ:œinput_valueœ,œidœ:œAgent-8SCLeœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "BlobStorage-dbs9g", - "sourceHandle": "{œdataTypeœ: œBlobStorageœ, œidœ: œBlobStorage-dbs9gœ, œnameœ: œfile_pathœ, œoutput_typesœ: [œDataœ]}", - "target": "AzureDocumentIntelligence-Zgj5O", - "targetHandle": "{œfieldNameœ: œurlœ, œidœ: œAzureDocumentIntelligence-Zgj5Oœ, œinputTypesœ: [œstrœ, œDataœ, œMessageœ, œlistœ], œtypeœ: œotherœ}" + "source": "ParserComponent-0LmpJ", + "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-0LmpJœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", + "target": "Agent-8SCLe", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œAgent-8SCLeœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "AzureDocumentIntelligence", - "id": "AzureDocumentIntelligence-Zgj5O", - "name": "structured_data", + "dataType": "AutonomizeModel", + "id": "AutonomizeModel-KQNLA", + "name": "component_as_tool", "output_types": [ - "DataFrame" + "Tool" ] }, "targetHandle": { - "fieldName": "input_data", - "id": "ParserComponent-C3bHX", + "fieldName": "tools", + "id": "Agent-8SCLe", "inputTypes": [ - "DataFrame", - "Data" + "Tool" ], "type": "other" } }, - "id": "xy-edge__AzureDocumentIntelligence-Zgj5O{œdataTypeœ:œAzureDocumentIntelligenceœ,œidœ:œAzureDocumentIntelligence-Zgj5Oœ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataFrameœ]}-ParserComponent-C3bHX{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-C3bHXœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-AutonomizeModel-KQNLA{œdataTypeœ:œAutonomizeModelœ,œidœ:œAutonomizeModel-KQNLAœ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}-Agent-8SCLe{œfieldNameœ:œtoolsœ,œidœ:œAgent-8SCLeœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}", "selected": false, - "source": "AzureDocumentIntelligence-Zgj5O", - "sourceHandle": "{œdataTypeœ: œAzureDocumentIntelligenceœ, œidœ: œAzureDocumentIntelligence-Zgj5Oœ, œnameœ: œstructured_dataœ, œoutput_typesœ: [œDataFrameœ]}", - "target": "ParserComponent-C3bHX", - "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-C3bHXœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" + "source": "AutonomizeModel-KQNLA", + "sourceHandle": "{œdataTypeœ: œAutonomizeModelœ, œidœ: œAutonomizeModel-KQNLAœ, œnameœ: œcomponent_as_toolœ, œoutput_typesœ: [œToolœ]}", + "target": "Agent-8SCLe", + "targetHandle": "{œfieldNameœ: œtoolsœ, œidœ: œAgent-8SCLeœ, œinputTypesœ: [œToolœ], œtypeœ: œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "AutonomizeModel", - "id": "AutonomizeModel-UO0Et", - "name": "prediction", + "dataType": "Agent", + "id": "Agent-8SCLe", + "name": "response", "output_types": [ - "Data" + "Message" ] }, "targetHandle": { - "fieldName": "data", - "id": "LabValuesExtraction-GJk2i", + "fieldName": "input_value", + "id": "ChatOutput-ql59o", "inputTypes": [ - "Data" + "Data", + "DataFrame", + "Message" ], "type": "other" } }, - "id": "xy-edge__AutonomizeModel-UO0Et{œdataTypeœ:œAutonomizeModelœ,œidœ:œAutonomizeModel-UO0Etœ,œnameœ:œpredictionœ,œoutput_typesœ:[œDataœ]}-LabValuesExtraction-GJk2i{œfieldNameœ:œdataœ,œidœ:œLabValuesExtraction-GJk2iœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-Agent-8SCLe{œdataTypeœ:œAgentœ,œidœ:œAgent-8SCLeœ,œnameœ:œresponseœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-ql59o{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-ql59oœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", "selected": false, - "source": "AutonomizeModel-UO0Et", - "sourceHandle": "{œdataTypeœ: œAutonomizeModelœ, œidœ: œAutonomizeModel-UO0Etœ, œnameœ: œpredictionœ, œoutput_typesœ: [œDataœ]}", - "target": "LabValuesExtraction-GJk2i", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œLabValuesExtraction-GJk2iœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "source": "Agent-8SCLe", + "sourceHandle": "{œdataTypeœ: œAgentœ, œidœ: œAgent-8SCLeœ, œnameœ: œresponseœ, œoutput_typesœ: [œMessageœ]}", + "target": "ChatOutput-ql59o", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-ql59oœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "LabValuesExtraction", - "id": "LabValuesExtraction-GJk2i", - "name": "data_list", + "dataType": "AzureDocumentIntelligence", + "id": "AzureDocumentIntelligence-ZfhxX", + "name": "structured_data", "output_types": [ - "Data" + "DataFrame" ] }, "targetHandle": { "fieldName": "input_data", - "id": "ParserComponent-ep9UC", + "id": "ParserComponent-0LmpJ", "inputTypes": [ "DataFrame", "Data" @@ -111,96 +109,77 @@ "type": "other" } }, - "id": "xy-edge__LabValuesExtraction-GJk2i{œdataTypeœ:œLabValuesExtractionœ,œidœ:œLabValuesExtraction-GJk2iœ,œnameœ:œdata_listœ,œoutput_typesœ:[œDataœ]}-ParserComponent-ep9UC{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-ep9UCœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", - "selected": false, - "source": "LabValuesExtraction-GJk2i", - "sourceHandle": "{œdataTypeœ: œLabValuesExtractionœ, œidœ: œLabValuesExtraction-GJk2iœ, œnameœ: œdata_listœ, œoutput_typesœ: [œDataœ]}", - "target": "ParserComponent-ep9UC", - "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-ep9UCœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" - }, - { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "ParserComponent", - "id": "ParserComponent-ep9UC", - "name": "parsed_text", - "output_types": [ - "Message" - ] - }, - "targetHandle": { - "fieldName": "input_value", - "id": "TextOutput-4q9JH", - "inputTypes": [ - "Message" - ], - "type": "str" - } - }, - "id": "xy-edge__ParserComponent-ep9UC{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-ep9UCœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-TextOutput-4q9JH{œfieldNameœ:œinput_valueœ,œidœ:œTextOutput-4q9JHœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-AzureDocumentIntelligence-ZfhxX{œdataTypeœ:œAzureDocumentIntelligenceœ,œidœ:œAzureDocumentIntelligence-ZfhxXœ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataFrameœ]}-ParserComponent-0LmpJ{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-0LmpJœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", "selected": false, - "source": "ParserComponent-ep9UC", - "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-ep9UCœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", - "target": "TextOutput-4q9JH", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œTextOutput-4q9JHœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "AzureDocumentIntelligence-ZfhxX", + "sourceHandle": "{œdataTypeœ: œAzureDocumentIntelligenceœ, œidœ: œAzureDocumentIntelligence-ZfhxXœ, œnameœ: œstructured_dataœ, œoutput_typesœ: [œDataFrameœ]}", + "target": "ParserComponent-0LmpJ", + "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-0LmpJœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "ParserComponent", - "id": "ParserComponent-C3bHX", - "name": "parsed_text", + "dataType": "BlobStorage", + "id": "BlobStorage-ElgWO", + "name": "file_path", "output_types": [ - "Message" + "Data" ] }, "targetHandle": { - "fieldName": "search_query", - "id": "AutonomizeModel-UO0Et", + "fieldName": "url", + "id": "AzureDocumentIntelligence-ZfhxX", "inputTypes": [ - "Message" + "str", + "Data", + "Message", + "list" ], - "type": "str" + "type": "other" } }, - "id": "xy-edge__ParserComponent-C3bHX{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-C3bHXœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-AutonomizeModel-UO0Et{œfieldNameœ:œsearch_queryœ,œidœ:œAutonomizeModel-UO0Etœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-BlobStorage-ElgWO{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-ElgWOœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}-AzureDocumentIntelligence-ZfhxX{œfieldNameœ:œurlœ,œidœ:œAzureDocumentIntelligence-ZfhxXœ,œinputTypesœ:[œstrœ,œDataœ,œMessageœ,œlistœ],œtypeœ:œotherœ}", "selected": false, - "source": "ParserComponent-C3bHX", - "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-C3bHXœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", - "target": "AutonomizeModel-UO0Et", - "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAutonomizeModel-UO0Etœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "BlobStorage-ElgWO", + "sourceHandle": "{œdataTypeœ: œBlobStorageœ, œidœ: œBlobStorage-ElgWOœ, œnameœ: œfile_pathœ, œoutput_typesœ: [œDataœ]}", + "target": "AzureDocumentIntelligence-ZfhxX", + "targetHandle": "{œfieldNameœ: œurlœ, œidœ: œAzureDocumentIntelligence-ZfhxXœ, œinputTypesœ: [œstrœ, œDataœ, œMessageœ, œlistœ], œtypeœ: œotherœ}" } ], "nodes": [ { "data": { - "id": "BlobStorage-dbs9g", + "id": "AzureDocumentIntelligence-ZfhxX", "node": { "base_classes": [ - "Data" + "DataFrame" ], "beta": false, - "category": "inputs", + "category": "models", "conditional_paths": [], "custom_fields": {}, - "description": "Load files from Azure Blob Storage", - "display_name": "Blob Storage", - "documentation": "http://docs.langflow.org/components/storage", + "description": "Process documents using Azure Document Intelligence (formerly Form Recognizer) for OCR, form extraction, and document analysis", + "display_name": "Azure Document Intelligence", + "documentation": "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/", "edited": false, "field_order": [ - "storage_account", - "container_name", - "file_name", - "return_all_files" + "url", + "file_path", + "silent_errors", + "delete_server_file_after_processing", + "ignore_unsupported_extensions", + "ignore_unspecified_files", + "model_type", + "extract_tables", + "include_confidence", + "use_multithreading", + "concurrency_multithreading" ], "frozen": false, - "icon": "Autonomize", - "key": "BlobStorage", - "last_updated": "2025-10-15T10:37:27.252Z", + "icon": "Azure", + "key": "AzureDocumentIntelligence", "legacy": false, "lf_version": "1.6.3", "metadata": {}, @@ -210,20 +189,21 @@ { "allows_loop": false, "cache": true, - "display_name": "File Path", + "display_name": "Structured Data", "group_outputs": false, - "method": "get_file_paths", - "name": "file_path", - "selected": "Data", + "method": "load_files", + "name": "structured_data", + "selected": "DataFrame", "tool_mode": true, "types": [ - "Data" + "DataFrame" ], "value": "__UNDEFINED__" } ], "pinned": false, - "score": 0.007568328950209746, + "priority": 3, + "score": 0.10489765225226892, "template": { "_type": "Component", "code": { @@ -242,61 +222,53 @@ "show": true, "title_case": false, "type": "code", - "value": "\"\"\"Blob Storage Component for loading files from Azure Blob Storage.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.services.deps import get_flexstore_service\nfrom loguru import logger\n\n\nclass BlobStorageComponent(Component):\n display_name = \"Blob Storage\"\n category: str = \"input_output\"\n description = \"Load files from Azure Blob Storage\"\n documentation = \"http://docs.langflow.org/components/storage\"\n icon = \"Autonomize\"\n name = \"BlobStorage\"\n\n # Match the property name expected by FileComponent\n FILE_PATH_FIELD = \"file_path\"\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._container_list: list[str] = []\n self._file_list: list[str] = []\n\n inputs = [\n StrInput(\n name=\"storage_account\",\n display_name=\"Storage Account\",\n required=False,\n info=\"Storage Account name\",\n advanced=True,\n ),\n DropdownInput(\n name=\"container_name\",\n display_name=\"Container\",\n info=\"Select a container from the storage account\",\n required=True,\n refresh_button=True,\n ),\n DropdownInput(\n name=\"file_name\",\n display_name=\"File\",\n info=\"Select a file from the container\",\n required=True,\n refresh_button=True,\n ),\n BoolInput(\n name=\"return_all_files\",\n display_name=\"Return All Files\",\n info=\"If true and no specific file is selected, returns all files in the container\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"file_path\", # Match the property name expected by FileComponent\n display_name=\"File Path\",\n method=\"get_file_paths\",\n ),\n ]\n\n async def update_build_config(\n self, build_config: dict, field_value: Any, field_name: str | None = None\n ):\n \"\"\"Update the build configuration based on field changes.\"\"\"\n logger.info(f\"update_build_config called with field_name: {field_name}\")\n\n storage_account = getattr(self, \"storage_account\", None)\n container_name = getattr(self, \"container_name\", None)\n\n if field_name == \"container_name\":\n try:\n # Load the container options when the field is refreshed\n service = get_flexstore_service()\n self._container_list = await service.get_containers(storage_account)\n\n build_config[\"container_name\"][\"options\"] = self._container_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating container list: {e!s}\")\n raise\n\n elif field_name == \"file_name\" and container_name:\n try:\n # Load the file options when the field is refreshed\n service = get_flexstore_service()\n self._file_list = await service.get_files(\n storage_account, container_name\n )\n\n build_config[\"file_name\"][\"options\"] = self._file_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating file list: {e!s}\")\n raise\n\n return build_config\n\n async def get_file_paths(self) -> list[Data]:\n \"\"\"Get file paths for the FileComponent to process.\"\"\"\n try:\n if not self.container_name:\n logger.warning(\"Container name is required.\")\n return []\n\n service = get_flexstore_service()\n file_paths = []\n\n # If a specific file is selected\n if self.file_name:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, self.file_name\n )\n if signed_url:\n file_paths = [Data(data={self.FILE_PATH_FIELD: signed_url})]\n # If no specific file is selected and return_all_files is True\n elif self.return_all_files:\n files = await service.get_files(\n self.storage_account, self.container_name\n )\n for file in files:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, file\n )\n if signed_url:\n file_paths.append(Data(data={self.FILE_PATH_FIELD: signed_url}))\n\n if file_paths:\n self.status = file_paths\n logger.info(f\"Generated {len(file_paths)} file paths\")\n for path in file_paths:\n logger.debug(f\"File path: {path.data.get(self.FILE_PATH_FIELD)}\")\n else:\n logger.warning(\"No file paths generated\")\n\n return file_paths\n\n except Exception as e:\n logger.error(f\"Error in get_file_paths: {e!s}\")\n return []\n" + "value": "\"\"\"Azure Document Intelligence Component - Form recognition and document processing.\"\"\"\n\nimport asyncio\nimport concurrent.futures\nimport mimetypes\nimport os\nimport tempfile\nfrom pathlib import Path\nfrom urllib.parse import unquote, urlparse\n\nimport aiohttp\nimport requests\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, Output\nfrom langflow.schema.data import Data\nfrom loguru import logger\n\n\nclass AzureDocumentIntelligenceComponent(BaseFileComponent):\n \"\"\"Component for Azure Document Intelligence - advanced document processing and form recognition.\"\"\"\n\n display_name: str = \"Azure Document Intelligence\"\n description: str = \"Process documents using Azure Document Intelligence (formerly Form Recognizer) for OCR, form extraction, and document analysis\"\n documentation: str = \"https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/\"\n icon: str = \"Azure\"\n name: str = \"AzureDocumentIntelligence\"\n category: str = \"models\"\n priority: int = 3 # High priority for document processing\n\n VALID_EXTENSIONS = [\"pdf\", \"jpg\", \"jpeg\", \"png\", \"bmp\", \"tiff\", \"tif\"]\n\n inputs = [\n HandleInput(\n name=\"url\",\n display_name=\"URL\",\n info=\"URL to the document to process\",\n input_types=[\"str\", \"Data\", \"Message\", \"list\"],\n required=False,\n ),\n # Include only the HandleInput and BoolInputs from base_inputs\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"file_path\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"silent_errors\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"delete_server_file_after_processing\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unsupported_extensions\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unspecified_files\"\n ),\n DropdownInput(\n name=\"model_type\",\n display_name=\"Model Type\",\n options=[\"prebuilt-document\", \"prebuilt-read\", \"prebuilt-layout\"],\n value=\"prebuilt-document\",\n info=\"Choose the Form Recognizer model to use\",\n ),\n BoolInput(\n name=\"extract_tables\",\n display_name=\"Extract Tables\",\n value=True,\n info=\"Extract and format tables from the document\",\n ),\n BoolInput(\n name=\"include_confidence\",\n display_name=\"Include Confidence Scores\",\n value=False,\n advanced=True,\n info=\"Include confidence scores in the extracted text\",\n ),\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"Use Concurrent Processing\",\n value=True,\n info=\"Enable concurrent processing of multiple files\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"Number of files to process concurrently\",\n value=2,\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Structured Data\", name=\"structured_data\", method=\"load_files\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self.temp_dir = tempfile.mkdtemp()\n self._downloaded_files = {}\n self._text_content = \"\"\n\n def get_text_content(self) -> str:\n \"\"\"Return the concatenated text content from all processed pages.\"\"\"\n return self._text_content\n\n def _extract_filename_from_url(self, url: str) -> str:\n \"\"\"Extract filename from URL or generate a default one.\"\"\"\n try:\n logger.debug(f\"Extracting filename from URL: {url}\")\n parsed_url = urlparse(url)\n path = unquote(parsed_url.path)\n filename = os.path.basename(path)\n\n if filename and \".\" in filename:\n logger.debug(f\"Found filename in URL path: {filename}\")\n return filename\n\n response = requests.head(url, allow_redirects=True)\n if \"content-disposition\" in response.headers:\n content_disp = response.headers[\"content-disposition\"]\n if \"filename=\" in content_disp:\n filename = content_disp.split(\"filename=\")[1].strip(\"\\\"'\")\n logger.debug(f\"Found filename in content-disposition: {filename}\")\n return filename\n\n if \"content-type\" in response.headers:\n ext = mimetypes.guess_extension(response.headers[\"content-type\"])\n if ext:\n filename = f\"downloaded{ext}\"\n logger.debug(f\"Generated filename from content-type: {filename}\")\n return filename\n\n logger.debug(\"Using default filename: downloaded.pdf\")\n return \"downloaded.pdf\"\n except Exception as e:\n logger.error(f\"Error extracting filename from URL: {e!s}\")\n return \"downloaded.pdf\"\n\n async def _download_file_from_url(self, url: str) -> str | None:\n \"\"\"Download a file from a URL.\"\"\"\n try:\n logger.debug(f\"Attempting to download file from URL: {url}\")\n filename = self._extract_filename_from_url(url)\n local_path = os.path.join(self.temp_dir, filename)\n logger.debug(f\"Local path for download: {local_path}\")\n\n async with aiohttp.ClientSession() as session:\n async with session.get(url) as response:\n response.raise_for_status()\n with open(local_path, \"wb\") as f:\n while True:\n chunk = await response.content.read(8192)\n if not chunk:\n break\n f.write(chunk)\n\n self._downloaded_files[url] = local_path\n logger.info(f\"Successfully downloaded file to {local_path}\")\n return local_path\n\n except Exception as e:\n logger.error(f\"Error downloading file from URL: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n def _extract_url_from_input(self, input_data) -> str | None:\n \"\"\"Extract URL string from various input types.\"\"\"\n logger.debug(f\"Extracting URL from input data type: {type(input_data)}\")\n\n # Handle list of Data objects (from blob storage)\n if isinstance(input_data, list):\n logger.debug(f\"Processing list input with {len(input_data)} items\")\n if input_data and isinstance(input_data[0], Data):\n url = input_data[0].data.get(\"file_path\")\n logger.debug(f\"Extracted URL from first Data object in list: {url}\")\n return url\n return None\n\n if isinstance(input_data, str):\n logger.debug(f\"Input is string: {input_data}\")\n return input_data\n elif isinstance(input_data, Data):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from Data object: {url}\")\n return url\n elif hasattr(input_data, \"text\"):\n logger.debug(f\"Extracted URL from text attribute: {input_data.text}\")\n return input_data.text\n elif hasattr(input_data, \"data\"):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from data attribute: {url}\")\n return url\n logger.debug(\"No URL found in input data\")\n return None\n\n def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Handle URLs and local paths.\"\"\"\n resolved_files = []\n logger.debug(\"Starting path validation and resolution\")\n\n # Handle URL input if provided\n if hasattr(self, \"url\") and self.url:\n try:\n logger.debug(f\"Processing URL input: {self.url}\")\n # Extract URL from different input types\n url = self._extract_url_from_input(self.url)\n if not url:\n logger.warning(\"No valid URL found in input\")\n return resolved_files\n\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(url)\n )\n finally:\n loop.close()\n\n if local_path:\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": url,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n except Exception as e:\n logger.error(f\"Error processing URL {url}: {e!s}\")\n if not self.silent_errors:\n raise\n\n # Handle file_path input\n file_path = self._file_path_as_list()\n logger.debug(f\"Processing file_path input: {file_path}\")\n for obj in file_path:\n server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)\n logger.debug(f\"Processing server file path: {server_file_path}\")\n\n if not server_file_path:\n if not self.ignore_unspecified_files:\n msg = f\"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property.\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n try:\n # Check if it's a URL\n if isinstance(server_file_path, str) and server_file_path.startswith(\n (\"http://\", \"https://\")\n ):\n logger.debug(f\"Processing URL from file_path: {server_file_path}\")\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(server_file_path)\n )\n finally:\n loop.close()\n\n if not local_path:\n continue\n\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": server_file_path,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n else:\n # Handle local files\n resolved_path = Path(self.resolve_path(str(server_file_path)))\n logger.debug(f\"Resolved local file path: {resolved_path}\")\n if not resolved_path.exists():\n msg = f\"File not found: {server_file_path}\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n obj,\n resolved_path,\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n\n except Exception as e:\n logger.error(f\"Error processing path {server_file_path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n logger.debug(f\"Resolved {len(resolved_files)} files\")\n return resolved_files\n\n async def process_file(\n self, file_path: str, *, silent_errors: bool = False\n ) -> tuple[Data, str]:\n \"\"\"Process a single file using the OCR service.\"\"\"\n try:\n from langflow.services.deps import get_document_intelligence_service\n \n # Create OCR service directly\n ocr_service = get_document_intelligence_service()\n\n with open(file_path, \"rb\") as file:\n file_content = file.read()\n\n extracted_content,plain_text,document_uuid = await ocr_service.process_document(\n file_content=file_content,\n model_type=self.model_type,\n include_confidence=self.include_confidence,\n extract_tables=self.extract_tables,\n )\n\n structured_data = Data(\n text=plain_text,\n data={\n self.SERVER_FILE_PATH_FIELDNAME: str(file_path),\n \"result\": extracted_content,\n \"document_uuid\":document_uuid\n },\n )\n\n return structured_data, plain_text\n\n except Exception as e:\n logger.error(f\"Error processing file {file_path}: {e!s}\")\n if not silent_errors:\n raise\n return None, \"\"\n\n def process_files(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process multiple files with concurrent processing.\"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n concurrency = (\n 1\n if not self.use_multithreading\n else max(1, self.concurrency_multithreading)\n )\n file_count = len(file_list)\n\n logger.info(f\"Processing {file_count} files with concurrency: {concurrency}\")\n\n all_plain_text = []\n processed_data = []\n\n if concurrency > 1 and file_count > 1:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n with concurrent.futures.ThreadPoolExecutor(\n max_workers=concurrency\n ) as executor:\n future_to_file = {\n executor.submit(\n lambda path: loop.run_until_complete(\n self.process_file(\n str(path), silent_errors=self.silent_errors\n )\n ),\n file.path,\n ): file\n for file in file_list\n }\n for future in concurrent.futures.as_completed(future_to_file):\n try:\n structured_data, plain_text = future.result()\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error in concurrent processing: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n else:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n for file in file_list:\n try:\n structured_data, plain_text = loop.run_until_complete(\n self.process_file(\n str(file.path), silent_errors=self.silent_errors\n )\n )\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error processing file {file.path}: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n\n # Store concatenated text content\n self._text_content = \"\\n\\n=== NEW DOCUMENT ===\\n\\n\".join(all_plain_text)\n\n return self.rollup_data(file_list, processed_data)\n\n def __del__(self):\n \"\"\"Cleanup temporary files and directory.\"\"\"\n try:\n if hasattr(self, \"temp_dir\") and os.path.exists(self.temp_dir):\n # Remove downloaded files\n for file_path in self._downloaded_files.values():\n if os.path.exists(file_path):\n os.unlink(file_path)\n # Remove the temporary directory\n os.rmdir(self.temp_dir)\n except Exception as e:\n logger.error(f\"Error cleaning up temporary files: {e!s}\")\n" }, - "container_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Container", + "concurrency_multithreading": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Processing Concurrency", "dynamic": false, - "info": "Select a container from the storage account", - "name": "container_name", - "options": [], - "options_metadata": [], + "info": "Number of files to process concurrently", + "list": false, + "list_add_label": "Add More", + "name": "concurrency_multithreading", "placeholder": "", - "refresh_button": true, - "required": true, + "required": false, "show": true, "title_case": false, - "toggle": false, "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "genesis-container" + "type": "int", + "value": 2 }, - "file_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "File", + "delete_server_file_after_processing": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Delete Server File After Processing", "dynamic": false, - "info": "Select a file from the container", - "name": "file_name", - "options": [], - "options_metadata": [], + "info": "If true, the Server File Path will be deleted after processing.", + "list": false, + "list_add_label": "Add More", + "name": "delete_server_file_after_processing", "placeholder": "", - "refresh_button": true, - "required": true, + "required": false, "show": true, "title_case": false, - "toggle": false, "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "PriorAuthSample1 (1).pdf" + "type": "bool", + "value": true }, - "return_all_files": { + "extract_tables": { "_input_type": "BoolInput", "advanced": false, - "display_name": "Return All Files", + "display_name": "Extract Tables", "dynamic": false, - "info": "If true and no specific file is selected, returns all files in the container", + "info": "Extract and format tables from the document", "list": false, "list_add_label": "Add More", - "name": "return_all_files", + "name": "extract_tables", "placeholder": "", "required": false, "show": true, @@ -306,305 +278,38 @@ "type": "bool", "value": true }, - "storage_account": { - "_input_type": "StrInput", + "file_path": { + "_input_type": "HandleInput", "advanced": true, - "display_name": "Storage Account", + "display_name": "Server File Path", "dynamic": false, - "info": "Storage Account name", - "list": false, + "info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.", + "input_types": [ + "Data", + "Message" + ], + "list": true, "list_add_label": "Add More", - "load_from_db": false, - "name": "storage_account", + "name": "file_path", "placeholder": "", "required": false, "show": true, "title_case": false, - "tool_mode": false, "trace_as_metadata": true, - "type": "str", + "type": "other", "value": "" - } - }, - "tool_mode": false - }, - "showNode": true, - "type": "BlobStorage" - }, - "id": "BlobStorage-dbs9g", - "measured": { - "height": 328, - "width": 320 - }, - "position": { - "x": -62.13541793823242, - "y": 428.0574392756075 - }, - "selected": false, - "type": "genericNode" - }, - { - "data": { - "description": "Identifies and extracts relevant lab results from medical records.", - "display_name": "Lab Values Extraction", - "id": "LabValuesExtraction-GJk2i", - "node": { - "base_classes": [ - "Data" - ], - "beta": false, - "conditional_paths": [], - "custom_fields": {}, - "description": "Identifies and extracts relevant lab results from medical records.", - "display_name": "Lab Values Extraction", - "documentation": "", - "edited": false, - "field_order": [ - "data" - ], - "frozen": false, - "icon": "Autonomize", - "legacy": false, - "lf_version": "1.6.3", - "metadata": {}, - "minimized": false, - "output_types": [], - "outputs": [ - { - "allows_loop": false, - "cache": true, - "display_name": "Data List", - "group_outputs": false, - "method": "parse_data_as_list", - "name": "data_list", - "selected": "Data", - "tool_mode": true, - "types": [ - "Data" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "template": { - "_type": "Component", - "code": { + }, + "ignore_unspecified_files": { + "_input_type": "BoolInput", "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", + "display_name": "Ignore Unspecified Files", + "dynamic": false, + "info": "If true, Data with no 'file_path' property will be ignored.", "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, + "list_add_label": "Add More", + "name": "ignore_unspecified_files", "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "code", - "value": "from loguru import logger\n\nfrom langflow.custom import Component\nfrom langflow.helpers.data import data_to_text_list\nfrom langflow.io import DataInput, Output\nfrom langflow.schema import Data\n\n\nclass LabValuesExtraction(Component):\n display_name = \"Lab Values Extraction\"\n description = \"Identifies and extracts relevant lab results from medical records.\"\n icon = \"Autonomize\"\n name = \"LabValuesExtraction\"\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The data to convert to text.\",\n is_list=True,\n ),\n # MultilineInput(\n # name=\"template\",\n # display_name=\"Template\",\n # info=\"The template to use for formatting the data. \"\n # \"It can contain the keys {text}, {data} or any other key in the Data.\",\n # value=\"{text}\",\n # ),\n # StrInput(name=\"sep\", display_name=\"Separator\", advanced=True, value=\"\\n\"),\n ]\n\n outputs = [\n # Output(\n # display_name=\"Text\",\n # name=\"text\",\n # info=\"Data as a single Message, with each input Data separated by Separator\",\n # method=\"parse_data\",\n # ),\n Output(\n display_name=\"Data List\",\n name=\"data_list\",\n info=\"Data as a list of new Data, each having `text` formatted by Template\",\n method=\"parse_data_as_list\",\n ),\n ]\n\n def _clean_args(self) -> tuple[list[Data], str, str]:\n data = self.data if isinstance(self.data, list) else [self.data]\n return data\n\n def parse_data_as_list(self) -> Data:\n data = self._clean_args()\n text_list, data_list = data_to_text_list(\"{value}\", data)\n for item, text in zip(data_list, text_list, strict=True):\n item.set_text(text)\n extracted_values = self.extract_lab_values(data_list)\n result = Data(value={\"data\": extracted_values})\n return result\n\n def extract_lab_values(self, data) -> list[Data]:\n logger.info(f\"lab values {data}\")\n lab_values = []\n for item in data:\n for prediction in item.data[\"value\"][\"data\"][\"prediction\"]:\n category = prediction[\"Category\"]\n logger.info(f\"category--- {category}\")\n if category in [\"TEST_TREATMENT_PROCEDURE\"]:\n lab_values.append(prediction)\n return lab_values\n" - }, - "data": { - "_input_type": "DataInput", - "advanced": false, - "display_name": "Data", - "dynamic": false, - "info": "The data to convert to text.", - "input_types": [ - "Data" - ], - "list": true, - "list_add_label": "Add More", - "name": "data", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "other", - "value": "" - } - }, - "tool_mode": false - }, - "showNode": true, - "type": "LabValuesExtraction" - }, - "dragging": false, - "id": "LabValuesExtraction-GJk2i", - "measured": { - "height": 181, - "width": 320 - }, - "position": { - "x": 1679.9752033933567, - "y": 505.18767739982127 - }, - "selected": false, - "type": "genericNode" - }, - { - "data": { - "id": "AzureDocumentIntelligence-Zgj5O", - "node": { - "base_classes": [ - "DataFrame" - ], - "beta": false, - "conditional_paths": [], - "custom_fields": {}, - "description": "Process documents using Azure Document Intelligence (formerly Form Recognizer) for OCR, form extraction, and document analysis", - "display_name": "Azure Document Intelligence", - "documentation": "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/", - "edited": false, - "field_order": [ - "url", - "file_path", - "silent_errors", - "delete_server_file_after_processing", - "ignore_unsupported_extensions", - "ignore_unspecified_files", - "model_type", - "extract_tables", - "include_confidence", - "use_multithreading", - "concurrency_multithreading" - ], - "frozen": false, - "icon": "Azure", - "legacy": false, - "lf_version": "1.6.3", - "metadata": {}, - "minimized": false, - "output_types": [], - "outputs": [ - { - "allows_loop": false, - "cache": true, - "display_name": "Structured Data", - "group_outputs": false, - "method": "load_files", - "name": "structured_data", - "selected": "DataFrame", - "tool_mode": true, - "types": [ - "DataFrame" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "priority": 3, - "template": { - "_type": "Component", - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "code", - "value": "\"\"\"Azure Document Intelligence Component - Form recognition and document processing.\"\"\"\n\nimport asyncio\nimport concurrent.futures\nimport mimetypes\nimport os\nimport tempfile\nfrom pathlib import Path\nfrom urllib.parse import unquote, urlparse\n\nimport aiohttp\nimport requests\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, Output\nfrom langflow.schema.data import Data\nfrom loguru import logger\n\n\nclass AzureDocumentIntelligenceComponent(BaseFileComponent):\n \"\"\"Component for Azure Document Intelligence - advanced document processing and form recognition.\"\"\"\n\n display_name: str = \"Azure Document Intelligence\"\n description: str = \"Process documents using Azure Document Intelligence (formerly Form Recognizer) for OCR, form extraction, and document analysis\"\n documentation: str = \"https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/\"\n icon: str = \"Azure\"\n name: str = \"AzureDocumentIntelligence\"\n category: str = \"models\"\n priority: int = 3 # High priority for document processing\n\n VALID_EXTENSIONS = [\"pdf\", \"jpg\", \"jpeg\", \"png\", \"bmp\", \"tiff\", \"tif\"]\n\n inputs = [\n HandleInput(\n name=\"url\",\n display_name=\"URL\",\n info=\"URL to the document to process\",\n input_types=[\"str\", \"Data\", \"Message\", \"list\"],\n required=False,\n ),\n # Include only the HandleInput and BoolInputs from base_inputs\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"file_path\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"silent_errors\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"delete_server_file_after_processing\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unsupported_extensions\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unspecified_files\"\n ),\n DropdownInput(\n name=\"model_type\",\n display_name=\"Model Type\",\n options=[\"prebuilt-document\", \"prebuilt-read\", \"prebuilt-layout\"],\n value=\"prebuilt-document\",\n info=\"Choose the Form Recognizer model to use\",\n ),\n BoolInput(\n name=\"extract_tables\",\n display_name=\"Extract Tables\",\n value=True,\n info=\"Extract and format tables from the document\",\n ),\n BoolInput(\n name=\"include_confidence\",\n display_name=\"Include Confidence Scores\",\n value=False,\n advanced=True,\n info=\"Include confidence scores in the extracted text\",\n ),\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"Use Concurrent Processing\",\n value=True,\n info=\"Enable concurrent processing of multiple files\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"Number of files to process concurrently\",\n value=2,\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Structured Data\", name=\"structured_data\", method=\"load_files\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self.temp_dir = tempfile.mkdtemp()\n self._downloaded_files = {}\n self._text_content = \"\"\n\n def get_text_content(self) -> str:\n \"\"\"Return the concatenated text content from all processed pages.\"\"\"\n return self._text_content\n\n def _extract_filename_from_url(self, url: str) -> str:\n \"\"\"Extract filename from URL or generate a default one.\"\"\"\n try:\n logger.debug(f\"Extracting filename from URL: {url}\")\n parsed_url = urlparse(url)\n path = unquote(parsed_url.path)\n filename = os.path.basename(path)\n\n if filename and \".\" in filename:\n logger.debug(f\"Found filename in URL path: {filename}\")\n return filename\n\n response = requests.head(url, allow_redirects=True)\n if \"content-disposition\" in response.headers:\n content_disp = response.headers[\"content-disposition\"]\n if \"filename=\" in content_disp:\n filename = content_disp.split(\"filename=\")[1].strip(\"\\\"'\")\n logger.debug(f\"Found filename in content-disposition: {filename}\")\n return filename\n\n if \"content-type\" in response.headers:\n ext = mimetypes.guess_extension(response.headers[\"content-type\"])\n if ext:\n filename = f\"downloaded{ext}\"\n logger.debug(f\"Generated filename from content-type: {filename}\")\n return filename\n\n logger.debug(\"Using default filename: downloaded.pdf\")\n return \"downloaded.pdf\"\n except Exception as e:\n logger.error(f\"Error extracting filename from URL: {e!s}\")\n return \"downloaded.pdf\"\n\n async def _download_file_from_url(self, url: str) -> str | None:\n \"\"\"Download a file from a URL.\"\"\"\n try:\n logger.debug(f\"Attempting to download file from URL: {url}\")\n filename = self._extract_filename_from_url(url)\n local_path = os.path.join(self.temp_dir, filename)\n logger.debug(f\"Local path for download: {local_path}\")\n\n async with aiohttp.ClientSession() as session:\n async with session.get(url) as response:\n response.raise_for_status()\n with open(local_path, \"wb\") as f:\n while True:\n chunk = await response.content.read(8192)\n if not chunk:\n break\n f.write(chunk)\n\n self._downloaded_files[url] = local_path\n logger.info(f\"Successfully downloaded file to {local_path}\")\n return local_path\n\n except Exception as e:\n logger.error(f\"Error downloading file from URL: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n def _extract_url_from_input(self, input_data) -> str | None:\n \"\"\"Extract URL string from various input types.\"\"\"\n logger.debug(f\"Extracting URL from input data type: {type(input_data)}\")\n\n # Handle list of Data objects (from blob storage)\n if isinstance(input_data, list):\n logger.debug(f\"Processing list input with {len(input_data)} items\")\n if input_data and isinstance(input_data[0], Data):\n url = input_data[0].data.get(\"file_path\")\n logger.debug(f\"Extracted URL from first Data object in list: {url}\")\n return url\n return None\n\n if isinstance(input_data, str):\n logger.debug(f\"Input is string: {input_data}\")\n return input_data\n elif isinstance(input_data, Data):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from Data object: {url}\")\n return url\n elif hasattr(input_data, \"text\"):\n logger.debug(f\"Extracted URL from text attribute: {input_data.text}\")\n return input_data.text\n elif hasattr(input_data, \"data\"):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from data attribute: {url}\")\n return url\n logger.debug(\"No URL found in input data\")\n return None\n\n def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Handle URLs and local paths.\"\"\"\n resolved_files = []\n logger.debug(\"Starting path validation and resolution\")\n\n # Handle URL input if provided\n if hasattr(self, \"url\") and self.url:\n try:\n logger.debug(f\"Processing URL input: {self.url}\")\n # Extract URL from different input types\n url = self._extract_url_from_input(self.url)\n if not url:\n logger.warning(\"No valid URL found in input\")\n return resolved_files\n\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(url)\n )\n finally:\n loop.close()\n\n if local_path:\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": url,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n except Exception as e:\n logger.error(f\"Error processing URL {url}: {e!s}\")\n if not self.silent_errors:\n raise\n\n # Handle file_path input\n file_path = self._file_path_as_list()\n logger.debug(f\"Processing file_path input: {file_path}\")\n for obj in file_path:\n server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)\n logger.debug(f\"Processing server file path: {server_file_path}\")\n\n if not server_file_path:\n if not self.ignore_unspecified_files:\n msg = f\"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property.\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n try:\n # Check if it's a URL\n if isinstance(server_file_path, str) and server_file_path.startswith(\n (\"http://\", \"https://\")\n ):\n logger.debug(f\"Processing URL from file_path: {server_file_path}\")\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(server_file_path)\n )\n finally:\n loop.close()\n\n if not local_path:\n continue\n\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": server_file_path,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n else:\n # Handle local files\n resolved_path = Path(self.resolve_path(str(server_file_path)))\n logger.debug(f\"Resolved local file path: {resolved_path}\")\n if not resolved_path.exists():\n msg = f\"File not found: {server_file_path}\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n obj,\n resolved_path,\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n\n except Exception as e:\n logger.error(f\"Error processing path {server_file_path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n logger.debug(f\"Resolved {len(resolved_files)} files\")\n return resolved_files\n\n async def process_file(\n self, file_path: str, *, silent_errors: bool = False\n ) -> tuple[Data, str]:\n \"\"\"Process a single file using the OCR service.\"\"\"\n try:\n from langflow.services.deps import get_document_intelligence_service\n \n # Create OCR service directly\n ocr_service = get_document_intelligence_service()\n\n with open(file_path, \"rb\") as file:\n file_content = file.read()\n\n extracted_content, plain_text = await ocr_service.process_document(\n file_content=file_content,\n model_type=self.model_type,\n include_confidence=self.include_confidence,\n extract_tables=self.extract_tables,\n )\n\n structured_data = Data(\n text=plain_text,\n data={\n self.SERVER_FILE_PATH_FIELDNAME: str(file_path),\n \"result\": extracted_content,\n },\n )\n\n return structured_data, plain_text\n\n except Exception as e:\n logger.error(f\"Error processing file {file_path}: {e!s}\")\n if not silent_errors:\n raise\n return None, \"\"\n\n def process_files(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process multiple files with concurrent processing.\"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n concurrency = (\n 1\n if not self.use_multithreading\n else max(1, self.concurrency_multithreading)\n )\n file_count = len(file_list)\n\n logger.info(f\"Processing {file_count} files with concurrency: {concurrency}\")\n\n all_plain_text = []\n processed_data = []\n\n if concurrency > 1 and file_count > 1:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n with concurrent.futures.ThreadPoolExecutor(\n max_workers=concurrency\n ) as executor:\n future_to_file = {\n executor.submit(\n lambda path: loop.run_until_complete(\n self.process_file(\n str(path), silent_errors=self.silent_errors\n )\n ),\n file.path,\n ): file\n for file in file_list\n }\n for future in concurrent.futures.as_completed(future_to_file):\n try:\n structured_data, plain_text = future.result()\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error in concurrent processing: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n else:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n for file in file_list:\n try:\n structured_data, plain_text = loop.run_until_complete(\n self.process_file(\n str(file.path), silent_errors=self.silent_errors\n )\n )\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error processing file {file.path}: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n\n # Store concatenated text content\n self._text_content = \"\\n\\n=== NEW DOCUMENT ===\\n\\n\".join(all_plain_text)\n\n return self.rollup_data(file_list, processed_data)\n\n def __del__(self):\n \"\"\"Cleanup temporary files and directory.\"\"\"\n try:\n if hasattr(self, \"temp_dir\") and os.path.exists(self.temp_dir):\n # Remove downloaded files\n for file_path in self._downloaded_files.values():\n if os.path.exists(file_path):\n os.unlink(file_path)\n # Remove the temporary directory\n os.rmdir(self.temp_dir)\n except Exception as e:\n logger.error(f\"Error cleaning up temporary files: {e!s}\")\n" - }, - "concurrency_multithreading": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Processing Concurrency", - "dynamic": false, - "info": "Number of files to process concurrently", - "list": false, - "list_add_label": "Add More", - "name": "concurrency_multithreading", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 2 - }, - "delete_server_file_after_processing": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Delete Server File After Processing", - "dynamic": false, - "info": "If true, the Server File Path will be deleted after processing.", - "list": false, - "list_add_label": "Add More", - "name": "delete_server_file_after_processing", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "extract_tables": { - "_input_type": "BoolInput", - "advanced": false, - "display_name": "Extract Tables", - "dynamic": false, - "info": "Extract and format tables from the document", - "list": false, - "list_add_label": "Add More", - "name": "extract_tables", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "file_path": { - "_input_type": "HandleInput", - "advanced": true, - "display_name": "Server File Path", - "dynamic": false, - "info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.", - "input_types": [ - "Data", - "Message" - ], - "list": true, - "list_add_label": "Add More", - "name": "file_path", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "other", - "value": "" - }, - "ignore_unspecified_files": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Ignore Unspecified Files", - "dynamic": false, - "info": "If true, Data with no 'file_path' property will be ignored.", - "list": false, - "list_add_label": "Add More", - "name": "ignore_unspecified_files", - "placeholder": "", - "required": false, + "required": false, "show": true, "title_case": false, "tool_mode": false, @@ -739,22 +444,21 @@ "showNode": true, "type": "AzureDocumentIntelligence" }, - "dragging": false, - "id": "AzureDocumentIntelligence-Zgj5O", + "id": "AzureDocumentIntelligence-ZfhxX", "measured": { - "height": 364, + "height": 365, "width": 320 }, "position": { - "x": 397.42698323021114, - "y": 453.95185932629363 + "x": 1182.2467548657498, + "y": 558.571523636203 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "ParserComponent-C3bHX", + "id": "ParserComponent-0LmpJ", "node": { "base_classes": [ "Message" @@ -776,7 +480,6 @@ "frozen": false, "icon": "braces", "key": "ParserComponent", - "last_updated": "2025-10-15T10:35:10.716Z", "legacy": false, "lf_version": "1.6.3", "metadata": {}, @@ -859,79 +562,732 @@ "title_case": false, "tool_mode": false, "trace_as_metadata": true, - "type": "tab", - "value": "Parser" + "type": "tab", + "value": "Parser" + }, + "pattern": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "Template", + "dynamic": true, + "info": "Use variables within curly brackets to extract column values for DataFrames or key values for Data.For example: `Name: {Name}, Age: {Age}, Country: {Country}`", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "pattern", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "{text}" + }, + "sep": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Separator", + "dynamic": false, + "info": "String used to separate rows/items.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "sep", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "\n" + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "ParserComponent" + }, + "id": "ParserComponent-0LmpJ", + "measured": { + "height": 329, + "width": 320 + }, + "position": { + "x": 1546.4717455363013, + "y": 640.6202609077544 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "Agent-8SCLe", + "node": { + "base_classes": [ + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Define the agent's instructions, then enter a task to complete using tools.", + "display_name": "Agent", + "documentation": "https://docs.langflow.org/agents", + "edited": false, + "field_order": [ + "agent_llm", + "max_tokens", + "model_kwargs", + "model_name", + "openai_api_base", + "api_key", + "temperature", + "seed", + "max_retries", + "timeout", + "system_prompt", + "n_messages", + "format_instructions", + "output_schema", + "tools", + "input_value", + "handle_parsing_errors", + "verbose", + "max_iterations", + "agent_description", + "add_current_date_tool" + ], + "frozen": false, + "icon": "bot", + "last_updated": "2025-10-29T10:52:54.074Z", + "legacy": false, + "lf_version": "1.6.3", + "metadata": {}, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Response", + "group_outputs": false, + "method": "message_response", + "name": "response", + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "add_current_date_tool": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Current Date", + "dynamic": false, + "info": "If true, will add a tool to the agent that returns the current date.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "add_current_date_tool", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "agent_description": { + "_input_type": "MultilineInput", + "advanced": true, + "copy_field": false, + "display_name": "Agent Description [Deprecated]", + "dynamic": false, + "info": "The description of the agent. This is only used when in Tool Mode. Defaults to 'A helpful assistant with access to the following tools:' and tools are added dynamically. This feature is deprecated and will be removed in future versions.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "agent_description", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "A helpful assistant with access to the following tools:" + }, + "agent_llm": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model Provider", + "dynamic": false, + "external_options": { + "fields": { + "data": { + "node": { + "display_name": "Connect other models", + "icon": "CornerDownLeft", + "name": "connect_other_models" + } + } + } + }, + "info": "The provider of the language model that the agent will use to generate responses.", + "input_types": [], + "name": "agent_llm", + "options": [ + "Anthropic", + "Google Generative AI", + "OpenAI", + "Azure OpenAI" + ], + "options_metadata": [ + { + "icon": "Anthropic" + }, + { + "icon": "GoogleGenerativeAI" + }, + { + "icon": "OpenAI" + }, + { + "icon": "Azure" + }, + { + "icon": "brain" + } + ], + "placeholder": "", + "real_time_refresh": true, + "refresh_button": false, + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "Azure OpenAI" + }, + "api_key": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "OpenAI API Key", + "dynamic": false, + "info": "The OpenAI API Key to use for the OpenAI model.", + "input_types": [], + "load_from_db": true, + "name": "api_key", + "password": true, + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "type": "str", + "value": "" + }, + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import (\n ToolCallingAgentComponent,\n)\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n IntInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\nfrom langflow.custom.default_providers import apply_provider_defaults\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"OpenAI\", \"Azure OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n if \"OpenAI\" in MODEL_PROVIDERS_DICT:\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n else:\n openai_inputs_filtered = []\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST],\n value=\"OpenAI\",\n real_time_refresh=True,\n refresh_button=False,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST if key in MODELS_METADATA]\n + [{\"icon\": \"brain\"}],\n external_options={\n \"fields\": {\n \"data\": {\n \"node\": {\n \"name\": \"connect_other_models\",\n \"display_name\": \"Connect other models\",\n \"icon\": \"CornerDownLeft\",\n }\n }\n },\n },\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\n \"true\",\n \"1\",\n \"t\",\n \"y\",\n \"yes\",\n ]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (\n ExceptionWithMessageError,\n ValueError,\n TypeError,\n RuntimeError,\n ) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (\n ExceptionWithMessageError,\n ValueError,\n TypeError,\n NotImplementedError,\n AttributeError,\n ) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(\n session_id=self.graph.session_id,\n order=\"Ascending\",\n n_messages=self.n_messages,\n )\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n \n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n build_config.update(fields_to_add)\n \n # Apply provider-specific defaults (only for Azure OpenAI currently)\n if field_value == \"Azure OpenAI\":\n build_config = apply_provider_defaults(field_value, build_config)\n \n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n \n elif field_value == \"connect_other_models\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST],\n real_time_refresh=True,\n refresh_button=False,\n input_types=[\"LanguageModel\"],\n placeholder=\"Awaiting model input.\",\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST if key in MODELS_METADATA],\n external_options={\n \"fields\": {\n \"data\": {\n \"node\": {\n \"name\": \"connect_other_models\",\n \"display_name\": \"Connect other models\",\n \"icon\": \"CornerDownLeft\",\n },\n }\n },\n },\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n \n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n \n # Rest of your existing method remains unchanged...\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\",\n tool_description=description,\n callbacks=self.get_langchain_callbacks(),\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n" + }, + "format_instructions": { + "_input_type": "MultilineInput", + "advanced": true, + "copy_field": false, + "display_name": "Output Format Instructions", + "dynamic": false, + "info": "Generic Template for structured output formatting. Valid only with Structured response.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "format_instructions", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "You are an AI that extracts structured JSON objects from unstructured text. Use a predefined schema with expected types (str, int, float, bool, dict). Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. Fill missing or ambiguous values with defaults: null for missing values. Remove exact duplicates but keep variations that have different field values. Always return valid JSON in the expected format, never throw errors. If multiple objects can be extracted, return them all in the structured format." + }, + "handle_parsing_errors": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Handle Parse Errors", + "dynamic": false, + "info": "Should the Agent fix errors when reading user input for better processing?", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "handle_parsing_errors", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "input_value": { + "_input_type": "MessageInput", + "advanced": false, + "display_name": "Input", + "dynamic": false, + "info": "The input provided by the user for the agent to process.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "input_value", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": true, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "max_iterations": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Max Iterations", + "dynamic": false, + "info": "The maximum number of attempts the agent can make to complete its task before it stops.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "max_iterations", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 15 + }, + "max_retries": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Max Retries", + "dynamic": false, + "info": "The maximum number of retries to make when generating.", + "list": false, + "list_add_label": "Add More", + "name": "max_retries", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 5 + }, + "max_tokens": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Max Tokens", + "dynamic": false, + "info": "The maximum number of tokens to generate. Set to 0 for unlimited tokens.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "max_tokens", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 0 + }, + "model_kwargs": { + "_input_type": "DictInput", + "advanced": true, + "display_name": "Model Kwargs", + "dynamic": false, + "info": "Additional keyword arguments to pass to the model.", + "list": false, + "list_add_label": "Add More", + "name": "model_kwargs", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "type": "dict", + "value": {} + }, + "model_name": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": true, + "dialog_inputs": {}, + "display_name": "Model Name", + "dynamic": false, + "external_options": {}, + "info": "To see the model names, first choose a provider. Then, enter your API key and click the refresh button next to the model name.", + "name": "model_name", + "options": [ + "gpt-4o-mini", + "gpt-4o", + "gpt-4.1", + "gpt-4.1-mini", + "gpt-4.1-nano", + "gpt-4-turbo", + "gpt-4-turbo-preview", + "gpt-4", + "gpt-3.5-turbo", + "gpt-5", + "gpt-5-mini", + "gpt-5-nano", + "gpt-5-chat-latest", + "o1", + "o3-mini", + "o3", + "o3-pro", + "o4-mini", + "o4-mini-high" + ], + "options_metadata": [], + "placeholder": "", + "real_time_refresh": false, + "required": false, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "gpt-4o-mini" + }, + "n_messages": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Number of Chat History Messages", + "dynamic": false, + "info": "Number of chat history messages to retrieve.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "n_messages", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 100 + }, + "openai_api_base": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "OpenAI API Base", + "dynamic": false, + "info": "The base URL of the OpenAI API. Defaults to https://api.openai.com/v1. You can change this to use other APIs like JinaChat, LocalAI and Prem.", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "openai_api_base", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "output_schema": { + "_input_type": "TableInput", + "advanced": true, + "display_name": "Output Schema", + "dynamic": false, + "info": "Schema Validation: Define the structure and data types for structured output. No validation if no output schema.", + "input_types": [], + "is_list": true, + "list_add_label": "Add More", + "name": "output_schema", + "placeholder": "", + "required": false, + "show": true, + "table_icon": "Table", + "table_schema": { + "columns": [ + { + "default": "field", + "description": "Specify the name of the output field.", + "disable_edit": false, + "display_name": "Name", + "edit_mode": "inline", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "name", + "sortable": true, + "type": "str" + }, + { + "default": "description of field", + "description": "Describe the purpose of the output field.", + "disable_edit": false, + "display_name": "Description", + "edit_mode": "popover", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "description", + "sortable": true, + "type": "str" + }, + { + "default": "str", + "description": "Indicate the data type of the output field (e.g., str, int, float, bool, dict).", + "disable_edit": false, + "display_name": "Type", + "edit_mode": "inline", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "type", + "options": [ + "str", + "int", + "float", + "bool", + "dict" + ], + "sortable": true, + "type": "str" + }, + { + "default": false, + "description": "Set to True if this output field should be a list of the specified type.", + "disable_edit": false, + "display_name": "As List", + "edit_mode": "inline", + "filterable": true, + "formatter": "boolean", + "hidden": false, + "name": "multiple", + "sortable": true, + "type": "boolean" + } + ] + }, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "trigger_icon": "Table", + "trigger_text": "Open table", + "type": "table", + "value": [] + }, + "seed": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Seed", + "dynamic": false, + "info": "The seed controls the reproducibility of the job.", + "list": false, + "list_add_label": "Add More", + "name": "seed", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 1 + }, + "system_prompt": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "Agent Instructions", + "dynamic": false, + "info": "System Prompt: Initial instructions and context provided to guide the agent's behavior.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "system_prompt", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "You are a clinical lab data specialist. Extract lab tests and procedures from medical documents.\n\n## WORKFLOW\n\n**Step 1: Extract Entities**\n- Call the clinical entity extraction tool with the user's raw text\n- Receive JSON: `{\"value\": {\"data\": {\"prediction\": [...]}}}`\n\n**Step 2: Filter Results**\n- From the prediction array, keep ONLY entities where `Category == \"TEST_TREATMENT_PROCEDURE\"`\n- Report these filtered entities\n\n## FILTERING RULE\n```python\nfiltered = [e for e in prediction if e[\"Category\"] == \"TEST_TREATMENT_PROCEDURE\"]\n```\n\n## OUTPUT FORMAT\n\n**Summary:**\n- Total entities: [count]\n- Lab procedures found: [filtered count]\n\n**Procedures/Tests:**\n- [Text] - Type: [Type], Confidence: [Score]%\n\n## CRITICAL RULES\n\n1. ALWAYS call entity extraction tool first\n2. ONLY report TEST_TREATMENT_PROCEDURE entities\n3. DO NOT skip the tool call\n4. DO NOT use your own knowledge\n" + }, + "temperature": { + "_input_type": "SliderInput", + "advanced": true, + "display_name": "Temperature", + "dynamic": false, + "info": "", + "input_types": [], + "max_label": "", + "max_label_icon": "", + "min_label": "", + "min_label_icon": "", + "name": "temperature", + "placeholder": "", + "range_spec": { + "max": 2, + "min": 0, + "step": 0.01, + "step_type": "float" + }, + "required": false, + "show": true, + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "title_case": false, + "tool_mode": false, + "type": "slider", + "value": 0.08 + }, + "timeout": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Timeout", + "dynamic": false, + "info": "The timeout for requests to OpenAI completion API.", + "list": false, + "list_add_label": "Add More", + "name": "timeout", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 700 }, - "pattern": { - "_input_type": "MultilineInput", + "tools": { + "_input_type": "HandleInput", "advanced": false, - "copy_field": false, - "display_name": "Template", - "dynamic": true, - "info": "Use variables within curly brackets to extract column values for DataFrames or key values for Data.For example: `Name: {Name}, Age: {Age}, Country: {Country}`", + "display_name": "Tools", + "dynamic": false, + "info": "These are the tools that the agent can use to help with tasks.", "input_types": [ - "Message" + "Tool" ], - "list": false, + "list": true, "list_add_label": "Add More", - "load_from_db": false, - "multiline": true, - "name": "pattern", + "name": "tools", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, - "type": "str", - "value": "Text: {text}" + "type": "other", + "value": "" }, - "sep": { - "_input_type": "MessageTextInput", + "verbose": { + "_input_type": "BoolInput", "advanced": true, - "display_name": "Separator", + "display_name": "Verbose", "dynamic": false, - "info": "String used to separate rows/items.", - "input_types": [ - "Message" - ], + "info": "", + "input_types": [], "list": false, "list_add_label": "Add More", - "load_from_db": false, - "name": "sep", + "name": "verbose", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, - "type": "str", - "value": "\n" + "type": "bool", + "value": true } }, "tool_mode": false }, "showNode": true, - "type": "ParserComponent" + "type": "Agent" }, - "dragging": false, - "id": "ParserComponent-C3bHX", + "id": "Agent-8SCLe", "measured": { - "height": 327, + "height": 759, "width": 320 }, "position": { - "x": 818.8829060931005, - "y": 434.1186394268635 + "x": 2088.8103842443415, + "y": 321.24995821689436 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "AutonomizeModel-UO0Et", + "id": "AutonomizeModel-KQNLA", "node": { "base_classes": [ "Data" @@ -949,6 +1305,7 @@ ], "frozen": false, "icon": "Autonomize", + "last_updated": "2025-10-29T10:52:54.075Z", "legacy": false, "lf_version": "1.6.3", "metadata": {}, @@ -958,14 +1315,17 @@ { "allows_loop": false, "cache": true, - "display_name": "Model Output", + "display_name": "Toolset", "group_outputs": false, - "method": "build_output", - "name": "prediction", - "selected": "Data", + "hidden": null, + "method": "to_toolkit", + "name": "component_as_tool", + "options": null, + "required_inputs": null, + "selected": "Tool", "tool_mode": true, "types": [ - "Data" + "Tool" ], "value": "__UNDEFINED__" } @@ -1015,7 +1375,7 @@ "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "Text: PRIOR AUTHORIZATION REQUEST FORM Instructions: Please fill out all applicable sections on both pages completely and legibly. Attach any additional documentation that is important for the review, e.g. chart notes or lab data, to support the prior authorization request. Plan Member Name: Alex Smith Member Information DOB: 06/26/2013 Today's date: Member ID: 123987456 Member Phone Number: Service Type Elective/Routine Expedited/Urgent *** Clinical notes and supporting documentation are REQUIRED to review for medical necessity. \"\"\" Referral/Service Type Requested Inpatient: ** Office: ** DME ER Admits SNF Custodial Acute Inpatient Rehab Inpatient Detox Ventilator Services Surgical Procedure Diagnostic Procedure Infusion Therapy Office Procedure/Visit Wheelchair Enteral Formula Prosthetic Other ** Home Health: Skilled Services Home Infusion Speech Therapy Physical Therapy Occupational Therapy Out-of-State request Procedure Information *Diagnosis Code & Description: M41. 9 Scoliosis For Internal Use: *CPT/HCPC Code & Description: 72149 - Diagnostic Radiology Procedures of the Spine *J Code/Description/Dose/NDC: *Number of visits/days/units requested (circle type and specific quantity): Dates of Service: From: 12/01/2023 To: 12/02/2023 Requesting Provider Information *Name/Credentials: Dr. Jason Momoa *Address: 123 Mock Street, Austin, CA 78727 Contact Name: *Billing NPI: 1235920999 *Phone No: 5551123123 Fax No: *Billing TIN: 8882231234 Servicing Provider/Facility Information *Name: Memorial Hospital *Address: 1204, Mason Street, Austin, CA 79723 Contact Name: *Servicing NPI: 1293801999 *Phone No: 5551123121 Fax No: *Servicing TIN: 9882231234 Patient Information: . Name: Alex Smith · Date of Birth: 06/26/2013 · Gender: Male . Insurance ID Number: 123987456 . Policy Holder's Name: Jason Smith · Relationship to Policy Holder: Son END OF PAGE · Address: 123 Mock Street, Sample City, State, Zip Code Provider Information: . Requesting Provider's Name: Dr. Jason Momoa · Requesting Provider NPI : 1235920999 · Provider's Contact Information: Phone - (123) 456-7890, Fax - (123) 456-7891 · Servicing Provider's Name: Memorial Hospital · Servicing Provider NPI: 1293801999 Presenting Complaint: . The patient was brought in by parents due to concerns about noticeable curvature of the spine. Clinical History: · Initial Observation: The patient exhibited postural asymmetry during a routine check-up. · Family History: No significant family history of spinal deformities. Physical Examination Findings: · Spinal Examination: An unusual curvature in the spine was observed. The curve deviates to the right in the lumbar region, indicating an atypical curvature. · Neurological Assessment: Subtle signs of neurological involvement were noted, including mild coordination issues and reflex asymmetry. Diagnostic Imaging: . X-Ray Findings: The X-ray of the spine confirmed the presence of a right lumbar curve, categorizing it as an atypical scoliotic curve. · Plan: . Advanced Imaging: Recommend an MRI of the spine to better characterize the curvature, assess for any structural abnormalities, and rule out any intraspinal pathology, such as a syrinx or tumor, which could be contributing to the atypical scoliosis presentation. · Referral to Specialist: Refer to a pediatric orthopedic surgeon specializing in spinal deformities for further evaluation and management. A neurology consultation may also be considered, given the neurological findings. . Monitoring and Follow-Up: Schedule regular follow-up appointments to monitor the progression of the curvature and assess the impact of any interventions. . Physical Therapy: Initiate physical therapy focusing on strengthening and flexibility exercises tailored for scoliosis patients. · Education and Support: Provide education to the patient and parents about scoliosis, its potential impacts, treatment options, and lifestyle modifications. Encourage participation in a support group for young patients with scoliosis, if available" + "value": "" }, "selected_model": { "_input_type": "DropdownInput", @@ -1046,64 +1406,103 @@ "trace_as_metadata": true, "type": "str", "value": "Clinical LLM" + }, + "tools_metadata": { + "_input_type": "ToolsInput", + "advanced": false, + "display_name": "Actions", + "dynamic": false, + "info": "Modify tool names and descriptions to help agents understand when to use each tool.", + "is_list": true, + "list_add_label": "Add More", + "name": "tools_metadata", + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "tools", + "value": [ + { + "args": { + "search_query": { + "default": "", + "description": "Input text to process with the selected model", + "title": "Search Query", + "type": "string" + } + }, + "description": "used to extract clinical entities from the text", + "display_description": "Unified interface for Autonomize text-based AI models with dropdown selection", + "display_name": "build_output", + "name": "clinical_entity_extractor", + "readonly": false, + "status": true, + "tags": [ + "build_output" + ] + } + ] } }, - "tool_mode": false + "tool_mode": true }, "showNode": true, "type": "AutonomizeModel" }, - "dragging": false, - "id": "AutonomizeModel-UO0Et", + "id": "AutonomizeModel-KQNLA", "measured": { - "height": 302, + "height": 300, "width": 320 }, "position": { - "x": 1291.5746470295176, - "y": 691.9504981194548 + "x": 1336.9314787549552, + "y": 196 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "ParserComponent-ep9UC", + "id": "ChatOutput-ql59o", "node": { "base_classes": [ "Message" ], "beta": false, - "category": "processing", + "category": "input_output", "conditional_paths": [], "custom_fields": {}, - "description": "Extracts text using a template.", - "display_name": "Parser", - "documentation": "https://docs.langflow.org/components-processing#parser", + "description": "Display a chat message in the Playground.", + "display_name": "Chat Output", + "documentation": "https://docs.langflow.org/components-io#chat-output", "edited": false, "field_order": [ - "input_data", - "mode", - "pattern", - "sep" + "input_value", + "should_store_message", + "sender", + "sender_name", + "session_id", + "data_template" ], "frozen": false, - "icon": "braces", - "key": "ParserComponent", - "last_updated": "2025-10-15T10:44:08.064Z", + "icon": "MessagesSquare", + "key": "ChatOutput", "legacy": false, "lf_version": "1.6.3", "metadata": {}, - "minimized": false, + "minimized": true, "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "Parsed Text", + "display_name": "Output Message", "group_outputs": false, - "method": "parse_combined_text", - "name": "parsed_text", + "method": "message_response", + "name": "message", "selected": "Message", "tool_mode": true, "types": [ @@ -1113,7 +1512,7 @@ } ], "pinned": false, - "score": 0.001, + "score": 0.003169567463043492, "template": { "_type": "Component", "code": { @@ -1132,21 +1531,45 @@ "show": true, "title_case": false, "type": "code", - "value": "from langflow.custom.custom_component.component import Component\nfrom langflow.helpers.data import safe_convert\nfrom langflow.inputs.inputs import BoolInput, HandleInput, MessageTextInput, MultilineInput, TabInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.template.field.base import Output\n\n\nclass ParserComponent(Component):\n display_name = \"Parser\"\n description = \"Extracts text using a template.\"\n documentation: str = \"https://docs.langflow.org/components-processing#parser\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"input_data\",\n display_name=\"Data or DataFrame\",\n input_types=[\"DataFrame\", \"Data\"],\n info=\"Accepts either a DataFrame or a Data object.\",\n required=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Parser\", \"Stringify\"],\n value=\"Parser\",\n info=\"Convert into raw string instead of using a template.\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"pattern\",\n display_name=\"Template\",\n info=(\n \"Use variables within curly brackets to extract column values for DataFrames \"\n \"or key values for Data.\"\n \"For example: `Name: {Name}, Age: {Age}, Country: {Country}`\"\n ),\n value=\"Text: {text}\", # Example default\n dynamic=True,\n show=True,\n required=True,\n ),\n MessageTextInput(\n name=\"sep\",\n display_name=\"Separator\",\n advanced=True,\n value=\"\\n\",\n info=\"String used to separate rows/items.\",\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Parsed Text\",\n name=\"parsed_text\",\n info=\"Formatted text output.\",\n method=\"parse_combined_text\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Dynamically hide/show `template` and enforce requirement based on `stringify`.\"\"\"\n if field_name == \"mode\":\n build_config[\"pattern\"][\"show\"] = self.mode == \"Parser\"\n build_config[\"pattern\"][\"required\"] = self.mode == \"Parser\"\n if field_value:\n clean_data = BoolInput(\n name=\"clean_data\",\n display_name=\"Clean Data\",\n info=(\n \"Enable to clean the data by removing empty rows and lines \"\n \"in each cell of the DataFrame/ Data object.\"\n ),\n value=True,\n advanced=True,\n required=False,\n )\n build_config[\"clean_data\"] = clean_data.to_dict()\n else:\n build_config.pop(\"clean_data\", None)\n\n return build_config\n\n def _clean_args(self):\n \"\"\"Prepare arguments based on input type.\"\"\"\n input_data = self.input_data\n\n match input_data:\n case list() if all(isinstance(item, Data) for item in input_data):\n msg = \"List of Data objects is not supported.\"\n raise ValueError(msg)\n case DataFrame():\n return input_data, None\n case Data():\n return None, input_data\n case dict() if \"data\" in input_data:\n try:\n if \"columns\" in input_data: # Likely a DataFrame\n return DataFrame.from_dict(input_data), None\n # Likely a Data object\n return None, Data(**input_data)\n except (TypeError, ValueError, KeyError) as e:\n msg = f\"Invalid structured input provided: {e!s}\"\n raise ValueError(msg) from e\n case _:\n msg = f\"Unsupported input type: {type(input_data)}. Expected DataFrame or Data.\"\n raise ValueError(msg)\n\n def parse_combined_text(self) -> Message:\n \"\"\"Parse all rows/items into a single text or convert input to string if `stringify` is enabled.\"\"\"\n # Early return for stringify option\n if self.mode == \"Stringify\":\n return self.convert_to_string()\n\n df, data = self._clean_args()\n\n lines = []\n if df is not None:\n for _, row in df.iterrows():\n formatted_text = self.pattern.format(**row.to_dict())\n lines.append(formatted_text)\n elif data is not None:\n formatted_text = self.pattern.format(**data.data)\n lines.append(formatted_text)\n\n combined_text = self.sep.join(lines)\n self.status = combined_text\n return Message(text=combined_text)\n\n def convert_to_string(self) -> Message:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n result = \"\"\n if isinstance(self.input_data, list):\n result = \"\\n\".join([safe_convert(item, clean_data=self.clean_data or False) for item in self.input_data])\n else:\n result = safe_convert(self.input_data or False)\n self.log(f\"Converted to string with length: {len(result)}\")\n\n message = Message(text=result)\n self.status = message\n return message\n" + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom langflow.base.io.chat import ChatComponent\nfrom langflow.helpers.data import safe_convert\nfrom langflow.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.schema.properties import Source\nfrom langflow.template.field.base import Output\nfrom langflow.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, icon, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, - "input_data": { + "data_template": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Data Template", + "dynamic": false, + "info": "Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "data_template", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "{text}" + }, + "input_value": { "_input_type": "HandleInput", "advanced": false, - "display_name": "Data or DataFrame", + "display_name": "Inputs", "dynamic": false, - "info": "Accepts either a DataFrame or a Data object.", + "info": "Message to be passed as output.", "input_types": [ + "Data", "DataFrame", - "Data" + "Message" ], "list": false, "list_add_label": "Add More", - "name": "input_data", + "name": "input_value", "placeholder": "", "required": true, "show": true, @@ -1155,65 +1578,67 @@ "type": "other", "value": "" }, - "mode": { - "_input_type": "TabInput", - "advanced": false, - "display_name": "Mode", + "sender": { + "_input_type": "DropdownInput", + "advanced": true, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Sender Type", "dynamic": false, - "info": "Convert into raw string instead of using a template.", - "name": "mode", + "external_options": {}, + "info": "Type of sender.", + "name": "sender", "options": [ - "Parser", - "Stringify" + "Machine", + "User" ], + "options_metadata": [], "placeholder": "", - "real_time_refresh": true, "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, - "type": "tab", - "value": "Stringify" + "type": "str", + "value": "Machine" }, - "pattern": { - "_input_type": "MultilineInput", - "advanced": false, - "copy_field": false, - "display_name": "Template", - "dynamic": true, - "info": "Use variables within curly brackets to extract column values for DataFrames or key values for Data.For example: `Name: {Name}, Age: {Age}, Country: {Country}`", + "sender_name": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Sender Name", + "dynamic": false, + "info": "Name of the sender.", "input_types": [ "Message" ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "multiline": true, - "name": "pattern", + "name": "sender_name", "placeholder": "", - "required": true, - "show": false, + "required": false, + "show": true, "title_case": false, "tool_mode": false, "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "" + "value": "AI" }, - "sep": { + "session_id": { "_input_type": "MessageTextInput", "advanced": true, - "display_name": "Separator", + "display_name": "Session ID", "dynamic": false, - "info": "String used to separate rows/items.", + "info": "The session ID of the chat. If empty, the current session ID parameter will be used.", "input_types": [ "Message" ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "name": "sep", + "name": "session_id", "placeholder": "", "required": false, "show": true, @@ -1222,48 +1647,69 @@ "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "\n" + "value": "" + }, + "should_store_message": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Store Messages", + "dynamic": false, + "info": "Store the message in the history.", + "list": false, + "list_add_label": "Add More", + "name": "should_store_message", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true } }, "tool_mode": false }, - "showNode": true, - "type": "ParserComponent" + "showNode": false, + "type": "ChatOutput" }, - "dragging": false, - "id": "ParserComponent-ep9UC", + "id": "ChatOutput-ql59o", "measured": { - "height": 245, - "width": 320 + "height": 48, + "width": 192 }, "position": { - "x": 2141.0975660551053, - "y": 720.0475596436473 + "x": 2450.712841397065, + "y": 557.3692648499549 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "TextOutput-4q9JH", + "id": "BlobStorage-ElgWO", "node": { "base_classes": [ - "Message" + "Data" ], "beta": false, "category": "input_output", "conditional_paths": [], "custom_fields": {}, - "description": "Sends text output via API.", - "display_name": "Text Output", - "documentation": "https://docs.langflow.org/components-io#text-output", + "description": "Load files from Azure Blob Storage", + "display_name": "Blob Storage", + "documentation": "http://docs.langflow.org/components/storage", "edited": false, "field_order": [ - "input_value" + "storage_account", + "container_name", + "file_name", + "return_all_files" ], "frozen": false, - "icon": "type", - "key": "TextOutput", + "icon": "Autonomize", + "key": "BlobStorage", + "last_updated": "2025-10-28T20:15:03.077Z", "legacy": false, "lf_version": "1.6.3", "metadata": {}, @@ -1273,20 +1719,20 @@ { "allows_loop": false, "cache": true, - "display_name": "Output Text", + "display_name": "File Path", "group_outputs": false, - "method": "text_response", - "name": "text", - "selected": "Message", + "method": "get_file_paths", + "name": "file_path", + "selected": "Data", "tool_mode": true, "types": [ - "Message" + "Data" ], "value": "__UNDEFINED__" } ], "pinned": false, - "score": 0.002151957098264304, + "score": 0.2945640631554785, "template": { "_type": "Component", "code": { @@ -1305,29 +1751,87 @@ "show": true, "title_case": false, "type": "code", - "value": "from langflow.base.io.text import TextComponent\nfrom langflow.io import MultilineInput, Output\nfrom langflow.schema.message import Message\n\n\nclass TextOutputComponent(TextComponent):\n display_name = \"Text Output\"\n description = \"Sends text output via API.\"\n documentation: str = \"https://docs.langflow.org/components-io#text-output\"\n icon = \"type\"\n name = \"TextOutput\"\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Text to be passed as output.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\n ]\n\n def text_response(self) -> Message:\n message = Message(\n text=self.input_value,\n )\n self.status = self.input_value\n return message\n" + "value": "\"\"\"Blob Storage Component for loading files from Azure Blob Storage.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.services.deps import get_flexstore_service\nfrom loguru import logger\n\n\nclass BlobStorageComponent(Component):\n display_name = \"Blob Storage\"\n category: str = \"input_output\"\n description = \"Load files from Azure Blob Storage\"\n documentation = \"http://docs.langflow.org/components/storage\"\n icon = \"Autonomize\"\n name = \"BlobStorage\"\n\n # Match the property name expected by FileComponent\n FILE_PATH_FIELD = \"file_path\"\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._container_list: list[str] = []\n self._file_list: list[str] = []\n\n inputs = [\n StrInput(\n name=\"storage_account\",\n display_name=\"Storage Account\",\n required=False,\n info=\"Storage Account name\",\n advanced=True,\n ),\n DropdownInput(\n name=\"container_name\",\n display_name=\"Container\",\n info=\"Select a container from the storage account\",\n required=True,\n refresh_button=True,\n ),\n DropdownInput(\n name=\"file_name\",\n display_name=\"File\",\n info=\"Select a file from the container\",\n required=True,\n refresh_button=True,\n ),\n BoolInput(\n name=\"return_all_files\",\n display_name=\"Return All Files\",\n info=\"If true and no specific file is selected, returns all files in the container\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"file_path\", # Match the property name expected by FileComponent\n display_name=\"File Path\",\n method=\"get_file_paths\",\n ),\n ]\n\n async def update_build_config(\n self, build_config: dict, field_value: Any, field_name: str | None = None\n ):\n \"\"\"Update the build configuration based on field changes.\"\"\"\n logger.info(f\"update_build_config called with field_name: {field_name}\")\n\n storage_account = getattr(self, \"storage_account\", None)\n container_name = getattr(self, \"container_name\", None)\n\n if field_name == \"container_name\":\n try:\n # Load the container options when the field is refreshed\n service = get_flexstore_service()\n self._container_list = await service.get_containers(storage_account)\n\n build_config[\"container_name\"][\"options\"] = self._container_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating container list: {e!s}\")\n raise\n\n elif field_name == \"file_name\" and container_name:\n try:\n # Load the file options when the field is refreshed\n service = get_flexstore_service()\n self._file_list = await service.get_files(\n storage_account, container_name\n )\n\n build_config[\"file_name\"][\"options\"] = self._file_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating file list: {e!s}\")\n raise\n\n return build_config\n\n async def get_file_paths(self) -> list[Data]:\n \"\"\"Get file paths for the FileComponent to process.\"\"\"\n try:\n if not self.container_name:\n logger.warning(\"Container name is required.\")\n return []\n\n service = get_flexstore_service()\n file_paths = []\n\n # If a specific file is selected\n if self.file_name:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, self.file_name\n )\n if signed_url:\n file_paths = [Data(data={self.FILE_PATH_FIELD: signed_url})]\n # If no specific file is selected and return_all_files is True\n elif self.return_all_files:\n files = await service.get_files(\n self.storage_account, self.container_name\n )\n for file in files:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, file\n )\n if signed_url:\n file_paths.append(Data(data={self.FILE_PATH_FIELD: signed_url}))\n\n if file_paths:\n self.status = file_paths\n logger.info(f\"Generated {len(file_paths)} file paths\")\n for path in file_paths:\n logger.debug(f\"File path: {path.data.get(self.FILE_PATH_FIELD)}\")\n else:\n logger.warning(\"No file paths generated\")\n\n return file_paths\n\n except Exception as e:\n logger.error(f\"Error in get_file_paths: {e!s}\")\n return []\n" }, - "input_value": { - "_input_type": "MultilineInput", + "container_name": { + "_input_type": "DropdownInput", "advanced": false, - "copy_field": false, - "display_name": "Inputs", + "combobox": false, + "dialog_inputs": {}, + "display_name": "Container", "dynamic": false, - "info": "Text to be passed as output.", - "input_types": [ - "Message" - ], + "external_options": {}, + "info": "Select a container from the storage account", + "name": "container_name", + "options": [], + "options_metadata": [], + "placeholder": "", + "refresh_button": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "genesis-platform-demo" + }, + "file_name": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "File", + "dynamic": false, + "external_options": {}, + "info": "Select a file from the container", + "name": "file_name", + "options": [], + "options_metadata": [], + "placeholder": "", + "refresh_button": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "discharge_summary.pdf" + }, + "return_all_files": { + "_input_type": "BoolInput", + "advanced": false, + "display_name": "Return All Files", + "dynamic": false, + "info": "If true and no specific file is selected, returns all files in the container", + "list": false, + "list_add_label": "Add More", + "name": "return_all_files", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "storage_account": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Storage Account", + "dynamic": false, + "info": "Storage Account name", "list": false, "list_add_label": "Add More", "load_from_db": false, - "multiline": true, - "name": "input_value", + "name": "storage_account", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, "type": "str", "value": "" @@ -1336,34 +1840,33 @@ "tool_mode": false }, "showNode": true, - "type": "TextOutput" + "type": "BlobStorage" }, - "dragging": false, - "id": "TextOutput-4q9JH", + "id": "BlobStorage-ElgWO", "measured": { - "height": 203, + "height": 329, "width": 320 }, "position": { - "x": 2841.8713358349687, - "y": 528.3264339491566 + "x": 838.359375, + "y": 197.68153235521322 }, - "selected": true, + "selected": false, "type": "genericNode" } ], "viewport": { - "x": -215.52134749806373, - "y": 37.16692952453377, - "zoom": 0.5275243383938776 + "x": -339.53316099757126, + "y": 98.76147531939824, + "zoom": 0.4539021955799936 } }, "description": "Identifies and extracts relevant lab results from medical records, organising them into structured data for quick reference and analysis", "endpoint_name": null, - "id": "1943ff8b-3898-431a-91a7-2e95ccd06724", + "id": "8a8aaccd-1fa2-46d5-8948-8347c4ef4463", "is_component": false, "last_tested_version": "1.6.3", - "name": "Lab Value Extraction Agent", + "name": "Lab Values Extraction", "tags": [ "chart-review" ] diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Prior Auth Form Extraction Agent.json b/src/backend/base/langflow/initial_setup/starter_projects/Prior Auth Form Extraction Agent.json index 0ee5e277c4ac..8e91a5918bf6 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Prior Auth Form Extraction Agent.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Prior Auth Form Extraction Agent.json @@ -6,185 +6,280 @@ "className": "", "data": { "sourceHandle": { - "dataType": "BlobStorage", - "id": "BlobStorage-OZSTh", - "name": "file_path", + "dataType": "Prompt", + "id": "Prompt-6MI3E", + "name": "prompt", "output_types": [ - "Data" + "Message" ] }, "targetHandle": { - "fieldName": "file_path", - "id": "split_into_images-mWZ3Q", + "fieldName": "search_query", + "id": "KnowledgeHubSearch-uubn9", "inputTypes": [ - "Data", "Message" ], - "type": "other" + "type": "str" } }, - "id": "xy-edge__BlobStorage-OZSTh{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-OZSThœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}-split_into_images-mWZ3Q{œfieldNameœ:œfile_pathœ,œidœ:œsplit_into_images-mWZ3Qœ,œinputTypesœ:[œDataœ,œMessageœ],œtypeœ:œotherœ}", - "source": "BlobStorage-OZSTh", - "sourceHandle": "{œdataTypeœ: œBlobStorageœ, œidœ: œBlobStorage-OZSThœ, œnameœ: œfile_pathœ, œoutput_typesœ: [œDataœ]}", - "target": "split_into_images-mWZ3Q", - "targetHandle": "{œfieldNameœ: œfile_pathœ, œidœ: œsplit_into_images-mWZ3Qœ, œinputTypesœ: [œDataœ, œMessageœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-Prompt-6MI3E{œdataTypeœ:œPromptœ,œidœ:œPrompt-6MI3Eœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-KnowledgeHubSearch-uubn9{œfieldNameœ:œsearch_queryœ,œidœ:œKnowledgeHubSearch-uubn9œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "Prompt-6MI3E", + "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-6MI3Eœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", + "target": "KnowledgeHubSearch-uubn9", + "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œKnowledgeHubSearch-uubn9œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "split_into_images", - "id": "split_into_images-mWZ3Q", - "name": "image_urls", + "dataType": "KnowledgeHubSearch", + "id": "KnowledgeHubSearch-uubn9", + "name": "query_results", "output_types": [ "Data" ] }, "targetHandle": { - "fieldName": "file_path", - "id": "srf-identification-ZaSBG", + "fieldName": "input_data", + "id": "ParserComponent-VGmQj", "inputTypes": [ - "Data", - "Message" + "DataFrame", + "Data" ], "type": "other" } }, - "id": "xy-edge__split_into_images-mWZ3Q{œdataTypeœ:œsplit_into_imagesœ,œidœ:œsplit_into_images-mWZ3Qœ,œnameœ:œimage_urlsœ,œoutput_typesœ:[œDataœ]}-srf-identification-ZaSBG{œfieldNameœ:œfile_pathœ,œidœ:œsrf-identification-ZaSBGœ,œinputTypesœ:[œDataœ,œMessageœ],œtypeœ:œotherœ}", - "source": "split_into_images-mWZ3Q", - "sourceHandle": "{œdataTypeœ: œsplit_into_imagesœ, œidœ: œsplit_into_images-mWZ3Qœ, œnameœ: œimage_urlsœ, œoutput_typesœ: [œDataœ]}", - "target": "srf-identification-ZaSBG", - "targetHandle": "{œfieldNameœ: œfile_pathœ, œidœ: œsrf-identification-ZaSBGœ, œinputTypesœ: [œDataœ, œMessageœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-KnowledgeHubSearch-uubn9{œdataTypeœ:œKnowledgeHubSearchœ,œidœ:œKnowledgeHubSearch-uubn9œ,œnameœ:œquery_resultsœ,œoutput_typesœ:[œDataœ]}-ParserComponent-VGmQj{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-VGmQjœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", + "selected": false, + "source": "KnowledgeHubSearch-uubn9", + "sourceHandle": "{œdataTypeœ: œKnowledgeHubSearchœ, œidœ: œKnowledgeHubSearch-uubn9œ, œnameœ: œquery_resultsœ, œoutput_typesœ: [œDataœ]}", + "target": "ParserComponent-VGmQj", + "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œParserComponent-VGmQjœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "srf-identification", - "id": "srf-identification-ZaSBG", - "name": "srfIdentification", + "dataType": "ParserComponent", + "id": "ParserComponent-VGmQj", + "name": "parsed_text", "output_types": [ - "Data" + "Message" ] }, "targetHandle": { - "fieldName": "input_array", - "id": "json_array_filter-8satn", + "fieldName": "context", + "id": "Prompt-L80eY", "inputTypes": [ - "Data" + "Message" ], - "type": "other" + "type": "str" } }, - "id": "xy-edge__srf-identification-ZaSBG{œdataTypeœ:œsrf-identificationœ,œidœ:œsrf-identification-ZaSBGœ,œnameœ:œsrfIdentificationœ,œoutput_typesœ:[œDataœ]}-json_array_filter-8satn{œfieldNameœ:œinput_arrayœ,œidœ:œjson_array_filter-8satnœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "srf-identification-ZaSBG", - "sourceHandle": "{œdataTypeœ: œsrf-identificationœ, œidœ: œsrf-identification-ZaSBGœ, œnameœ: œsrfIdentificationœ, œoutput_typesœ: [œDataœ]}", - "target": "json_array_filter-8satn", - "targetHandle": "{œfieldNameœ: œinput_arrayœ, œidœ: œjson_array_filter-8satnœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-ParserComponent-VGmQj{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-VGmQjœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-L80eY{œfieldNameœ:œcontextœ,œidœ:œPrompt-L80eYœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "ParserComponent-VGmQj", + "sourceHandle": "{œdataTypeœ: œParserComponentœ, œidœ: œParserComponent-VGmQjœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-L80eY", + "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-L80eYœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "srf-extraction", - "id": "srf-extraction-S3eHF", - "name": "srfExtraction", + "dataType": "Prompt", + "id": "Prompt-6MI3E", + "name": "prompt", "output_types": [ - "Data" + "Message" ] }, "targetHandle": { - "fieldName": "data", - "id": "JSONOutput-Kpx5X", + "fieldName": "question", + "id": "Prompt-L80eY", "inputTypes": [ - "Data" + "Message" ], - "type": "other" + "type": "str" } }, - "id": "xy-edge__srf-extraction-S3eHF{œdataTypeœ:œsrf-extractionœ,œidœ:œsrf-extraction-S3eHFœ,œnameœ:œsrfExtractionœ,œoutput_typesœ:[œDataœ]}-JSONOutput-Kpx5X{œfieldNameœ:œdataœ,œidœ:œJSONOutput-Kpx5Xœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "srf-extraction-S3eHF", - "sourceHandle": "{œdataTypeœ: œsrf-extractionœ, œidœ: œsrf-extraction-S3eHFœ, œnameœ: œsrfExtractionœ, œoutput_typesœ: [œDataœ]}", - "target": "JSONOutput-Kpx5X", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œJSONOutput-Kpx5Xœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-Prompt-6MI3E{œdataTypeœ:œPromptœ,œidœ:œPrompt-6MI3Eœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-Prompt-L80eY{œfieldNameœ:œquestionœ,œidœ:œPrompt-L80eYœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "Prompt-6MI3E", + "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-6MI3Eœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-L80eY", + "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-L80eYœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "json_array_filter", - "id": "json_array_filter-8satn", - "name": "filtered_array", + "dataType": "Prompt", + "id": "Prompt-L80eY", + "name": "prompt", "output_types": [ - "Data" + "Message" ] }, "targetHandle": { - "fieldName": "file_path", - "id": "srf-extraction-S3eHF", + "fieldName": "input_value", + "id": "AzureOpenAIModel-qewCW", "inputTypes": [ - "Data", "Message" ], - "type": "other" + "type": "str" + } + }, + "id": "reactflow__edge-Prompt-L80eY{œdataTypeœ:œPromptœ,œidœ:œPrompt-L80eYœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-AzureOpenAIModel-qewCW{œfieldNameœ:œinput_valueœ,œidœ:œAzureOpenAIModel-qewCWœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "Prompt-L80eY", + "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-L80eYœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", + "target": "AzureOpenAIModel-qewCW", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œAzureOpenAIModel-qewCWœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + }, + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "AzureOpenAIModel", + "id": "AzureOpenAIModel-qewCW", + "name": "text_output", + "output_types": [ + "Message" + ] + }, + "targetHandle": { + "fieldName": "input_value", + "id": "TextOutput-rUiuT", + "inputTypes": [ + "Message" + ], + "type": "str" + } + }, + "id": "reactflow__edge-AzureOpenAIModel-qewCW{œdataTypeœ:œAzureOpenAIModelœ,œidœ:œAzureOpenAIModel-qewCWœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-TextOutput-rUiuT{œfieldNameœ:œinput_valueœ,œidœ:œTextOutput-rUiuTœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "AzureOpenAIModel-qewCW", + "sourceHandle": "{œdataTypeœ: œAzureOpenAIModelœ, œidœ: œAzureOpenAIModel-qewCWœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", + "target": "TextOutput-rUiuT", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œTextOutput-rUiuTœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + }, + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "TextInput", + "id": "TextInput-gMfph", + "name": "text", + "output_types": [ + "Message" + ] + }, + "targetHandle": { + "fieldName": "procedure_description", + "id": "Prompt-6MI3E", + "inputTypes": [ + "Message" + ], + "type": "str" + } + }, + "id": "xy-edge__TextInput-gMfph{œdataTypeœ:œTextInputœ,œidœ:œTextInput-gMfphœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-Prompt-6MI3E{œfieldNameœ:œprocedure_descriptionœ,œidœ:œPrompt-6MI3Eœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "TextInput-gMfph", + "sourceHandle": "{œdataTypeœ: œTextInputœ, œidœ: œTextInput-gMfphœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-6MI3E", + "targetHandle": "{œfieldNameœ: œprocedure_descriptionœ, œidœ: œPrompt-6MI3Eœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + }, + { + "animated": false, + "className": "", + "data": { + "sourceHandle": { + "dataType": "TextInput", + "id": "TextInput-vut4E", + "name": "text", + "output_types": [ + "Message" + ] + }, + "targetHandle": { + "fieldName": "diagnosis_description", + "id": "Prompt-6MI3E", + "inputTypes": [ + "Message" + ], + "type": "str" } }, - "id": "xy-edge__json_array_filter-8satn{œdataTypeœ:œjson_array_filterœ,œidœ:œjson_array_filter-8satnœ,œnameœ:œfiltered_arrayœ,œoutput_typesœ:[œDataœ]}-srf-extraction-S3eHF{œfieldNameœ:œfile_pathœ,œidœ:œsrf-extraction-S3eHFœ,œinputTypesœ:[œDataœ,œMessageœ],œtypeœ:œotherœ}", - "source": "json_array_filter-8satn", - "sourceHandle": "{œdataTypeœ: œjson_array_filterœ, œidœ: œjson_array_filter-8satnœ, œnameœ: œfiltered_arrayœ, œoutput_typesœ: [œDataœ]}", - "target": "srf-extraction-S3eHF", - "targetHandle": "{œfieldNameœ: œfile_pathœ, œidœ: œsrf-extraction-S3eHFœ, œinputTypesœ: [œDataœ, œMessageœ], œtypeœ: œotherœ}" + "id": "xy-edge__TextInput-vut4E{œdataTypeœ:œTextInputœ,œidœ:œTextInput-vut4Eœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-Prompt-6MI3E{œfieldNameœ:œdiagnosis_descriptionœ,œidœ:œPrompt-6MI3Eœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "TextInput-vut4E", + "sourceHandle": "{œdataTypeœ: œTextInputœ, œidœ: œTextInput-vut4Eœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-6MI3E", + "targetHandle": "{œfieldNameœ: œdiagnosis_descriptionœ, œidœ: œPrompt-6MI3Eœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" } ], "nodes": [ { "data": { - "id": "BlobStorage-OZSTh", + "id": "Prompt-6MI3E", "node": { "base_classes": [ - "Data" + "Message" ], "beta": false, "conditional_paths": [], - "custom_fields": {}, - "description": "Load files from Azure Blob Storage", - "display_name": "Blob Storage", - "documentation": "http://docs.langflow.org/components/storage", + "custom_fields": { + "template": [ + "procedure_description", + "diagnosis_description" + ] + }, + "description": "Create a prompt template with dynamic variables.", + "display_name": "Prompt", + "documentation": "", "edited": false, + "error": null, "field_order": [ - "storage_account", - "container_name", - "file_name", - "return_all_files" + "template", + "tool_placeholder" ], "frozen": false, - "icon": "Autonomize", + "full_path": null, + "icon": "prompts", + "is_composition": null, + "is_input": null, + "is_output": null, "legacy": false, - "lf_version": "1.4.3", "metadata": {}, "minimized": false, + "name": "", "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "File Path", - "group_outputs": false, - "method": "get_file_paths", - "name": "file_path", - "selected": "Data", + "display_name": "Prompt Message", + "method": "build_prompt", + "name": "prompt", + "selected": "Message", "tool_mode": true, "types": [ - "Data" + "Message" ], "value": "__UNDEFINED__" } ], "pinned": false, + "priority": null, "template": { "_type": "Component", "code": { @@ -203,85 +298,89 @@ "show": true, "title_case": false, "type": "code", - "value": "\"\"\"Blob Storage Component for loading files from Azure Blob Storage.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.services.deps import get_flexstore_service\nfrom loguru import logger\n\n\nclass BlobStorageComponent(Component):\n display_name = \"Blob Storage\"\n category: str = \"input_output\"\n description = \"Load files from Azure Blob Storage\"\n documentation = \"http://docs.langflow.org/components/storage\"\n icon = \"Autonomize\"\n name = \"BlobStorage\"\n\n # Match the property name expected by FileComponent\n FILE_PATH_FIELD = \"file_path\"\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._container_list: list[str] = []\n self._file_list: list[str] = []\n\n inputs = [\n StrInput(\n name=\"storage_account\",\n display_name=\"Storage Account\",\n required=False,\n info=\"Storage Account name\",\n advanced=True,\n ),\n DropdownInput(\n name=\"container_name\",\n display_name=\"Container\",\n info=\"Select a container from the storage account\",\n required=True,\n refresh_button=True,\n ),\n DropdownInput(\n name=\"file_name\",\n display_name=\"File\",\n info=\"Select a file from the container\",\n required=True,\n refresh_button=True,\n ),\n BoolInput(\n name=\"return_all_files\",\n display_name=\"Return All Files\",\n info=\"If true and no specific file is selected, returns all files in the container\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"file_path\", # Match the property name expected by FileComponent\n display_name=\"File Path\",\n method=\"get_file_paths\",\n ),\n ]\n\n async def update_build_config(\n self, build_config: dict, field_value: Any, field_name: str | None = None\n ):\n \"\"\"Update the build configuration based on field changes.\"\"\"\n logger.info(f\"update_build_config called with field_name: {field_name}\")\n\n storage_account = getattr(self, \"storage_account\", None)\n container_name = getattr(self, \"container_name\", None)\n\n if field_name == \"container_name\":\n try:\n # Load the container options when the field is refreshed\n service = get_flexstore_service()\n self._container_list = await service.get_containers(storage_account)\n\n build_config[\"container_name\"][\"options\"] = self._container_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating container list: {e!s}\")\n raise\n\n elif field_name == \"file_name\" and container_name:\n try:\n # Load the file options when the field is refreshed\n service = get_flexstore_service()\n self._file_list = await service.get_files(\n storage_account, container_name\n )\n\n build_config[\"file_name\"][\"options\"] = self._file_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating file list: {e!s}\")\n raise\n\n return build_config\n\n async def get_file_paths(self) -> list[Data]:\n \"\"\"Get file paths for the FileComponent to process.\"\"\"\n try:\n if not self.container_name:\n logger.warning(\"Container name is required.\")\n return []\n\n service = get_flexstore_service()\n file_paths = []\n\n # If a specific file is selected\n if self.file_name:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, self.file_name\n )\n if signed_url:\n file_paths = [Data(data={self.FILE_PATH_FIELD: signed_url})]\n # If no specific file is selected and return_all_files is True\n elif self.return_all_files:\n files = await service.get_files(\n self.storage_account, self.container_name\n )\n for file in files:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, file\n )\n if signed_url:\n file_paths.append(Data(data={self.FILE_PATH_FIELD: signed_url}))\n\n if file_paths:\n self.status = file_paths\n logger.info(f\"Generated {len(file_paths)} file paths\")\n for path in file_paths:\n logger.debug(f\"File path: {path.data.get(self.FILE_PATH_FIELD)}\")\n else:\n logger.warning(\"No file paths generated\")\n\n return file_paths\n\n except Exception as e:\n logger.error(f\"Error in get_file_paths: {e!s}\")\n return []\n" + "value": "from langflow.base.prompts.api_utils import process_prompt_template\nfrom langflow.custom import Component\nfrom langflow.inputs.inputs import DefaultPromptField\nfrom langflow.io import MessageTextInput, Output, PromptInput\nfrom langflow.schema.message import Message\nfrom langflow.template.utils import update_template_values\n\n\nclass PromptComponent(Component):\n display_name: str = \"Prompt\"\n description: str = \"Create a prompt template with dynamic variables.\"\n icon = \"prompts\"\n trace_type = \"prompt\"\n name = \"Prompt\"\n\n inputs = [\n PromptInput(name=\"template\", display_name=\"Template\"),\n MessageTextInput(\n name=\"tool_placeholder\",\n display_name=\"Tool Placeholder\",\n tool_mode=True,\n advanced=True,\n info=\"A placeholder input for tool mode.\",\n ),\n ]\n\n outputs = [\n Output(display_name=\"Prompt Message\", name=\"prompt\", method=\"build_prompt\"),\n ]\n\n async def build_prompt(self) -> Message:\n prompt = Message.from_template(**self._attributes)\n self.status = prompt.text\n return prompt\n\n def _update_template(self, frontend_node: dict):\n prompt_template = frontend_node[\"template\"][\"template\"][\"value\"]\n custom_fields = frontend_node[\"custom_fields\"]\n frontend_node_template = frontend_node[\"template\"]\n _ = process_prompt_template(\n template=prompt_template,\n name=\"template\",\n custom_fields=custom_fields,\n frontend_node_template=frontend_node_template,\n )\n return frontend_node\n\n async def update_frontend_node(self, new_frontend_node: dict, current_frontend_node: dict):\n \"\"\"This function is called after the code validation is done.\"\"\"\n frontend_node = await super().update_frontend_node(new_frontend_node, current_frontend_node)\n template = frontend_node[\"template\"][\"template\"][\"value\"]\n # Kept it duplicated for backwards compatibility\n _ = process_prompt_template(\n template=template,\n name=\"template\",\n custom_fields=frontend_node[\"custom_fields\"],\n frontend_node_template=frontend_node[\"template\"],\n )\n # Now that template is updated, we need to grab any values that were set in the current_frontend_node\n # and update the frontend_node with those values\n update_template_values(new_template=frontend_node, previous_template=current_frontend_node[\"template\"])\n return frontend_node\n\n def _get_fallback_input(self, **kwargs):\n return DefaultPromptField(**kwargs)\n" }, - "container_name": { - "_input_type": "DropdownInput", + "diagnosis_description": { "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Container", + "display_name": "diagnosis_description", "dynamic": false, - "info": "Select a container from the storage account", - "name": "container_name", - "options": [], - "options_metadata": [], + "field_type": "str", + "fileTypes": [], + "file_path": "", + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "load_from_db": false, + "multiline": true, + "name": "diagnosis_description", "placeholder": "", - "refresh_button": true, - "required": true, + "required": false, "show": true, "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, "type": "str", - "value": "genesis-container" + "value": "" }, - "file_name": { - "_input_type": "DropdownInput", + "procedure_description": { "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "File", + "display_name": "procedure_description", "dynamic": false, - "info": "Select a file from the container", - "name": "file_name", - "options": [], - "options_metadata": [], + "field_type": "str", + "fileTypes": [], + "file_path": "", + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "load_from_db": false, + "multiline": true, + "name": "procedure_description", "placeholder": "", - "refresh_button": true, - "required": true, + "required": false, "show": true, "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, "type": "str", - "value": "PriorAuthSample1 (1).pdf" + "value": "" }, - "return_all_files": { - "_input_type": "BoolInput", + "template": { + "_input_type": "PromptInput", "advanced": false, - "display_name": "Return All Files", + "display_name": "Template", "dynamic": false, - "info": "If true and no specific file is selected, returns all files in the container", + "info": "", "list": false, "list_add_label": "Add More", - "name": "return_all_files", + "name": "template", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true + "trace_as_input": true, + "type": "prompt", + "value": "{procedure_description} is considered medically necessary for {diagnosis_description} under which scenarios/indications?" }, - "storage_account": { - "_input_type": "StrInput", + "tool_placeholder": { + "_input_type": "MessageTextInput", "advanced": true, - "display_name": "Storage Account", + "display_name": "Tool Placeholder", "dynamic": false, - "info": "Storage Account name", + "info": "A placeholder input for tool mode.", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "name": "storage_account", + "name": "tool_placeholder", "placeholder": "", "required": false, "show": true, "title_case": false, - "tool_mode": false, + "tool_mode": true, + "trace_as_input": true, "trace_as_metadata": true, "type": "str", "value": "" @@ -290,24 +389,25 @@ "tool_mode": false }, "showNode": true, - "type": "BlobStorage" + "type": "Prompt" }, - "dragging": false, - "id": "BlobStorage-OZSTh", + "id": "Prompt-6MI3E", "measured": { - "height": 367, + "height": 419, "width": 320 }, "position": { - "x": 168, - "y": 158 + "x": 1389.6582632910818, + "y": 486.28497918874655 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "split_into_images-mWZ3Q", + "description": "This component is used to search for information in the knowledge hub.", + "display_name": "Knowledge Hub Search", + "id": "KnowledgeHubSearch-uubn9", "node": { "base_classes": [ "Data" @@ -315,24 +415,18 @@ "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Split PDFs and TIFFs into individual images", - "display_name": "Split Into Images", + "description": "This component is used to search for information in the knowledge hub.", + "display_name": "Knowledge Hub Search", "documentation": "http://docs.langflow.org/components/custom", "edited": false, "field_order": [ - "file_path", - "silent_errors", - "delete_server_file_after_processing", - "ignore_unsupported_extensions", - "ignore_unspecified_files", - "storage_account", - "temp_container", - "keep_original_size" + "search_query", + "selected_hubs" ], "frozen": false, - "icon": "FileType", + "icon": "Autonomize", + "last_updated": "2025-10-29T10:51:08.051Z", "legacy": false, - "lf_version": "1.4.3", "metadata": {}, "minimized": false, "output_types": [], @@ -340,10 +434,10 @@ { "allows_loop": false, "cache": true, - "display_name": "Image URLs", + "display_name": "Query Results", "group_outputs": false, - "method": "get_image_urls", - "name": "image_urls", + "method": "build_output", + "name": "query_results", "selected": "Data", "tool_mode": true, "types": [ @@ -371,201 +465,101 @@ "show": true, "title_case": false, "type": "code", - "value": "\"\"\"Split Into Images Component - Splits PDFs and TIFFs into individual images.\"\"\"\n\nimport io\nimport os\nimport tempfile\nfrom pathlib import Path\nfrom urllib.parse import urlparse\nfrom typing import Any\n\nimport aiohttp\nimport fitz\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.base.data.utils import IMG_FILE_TYPES\nfrom langflow.io import BoolInput, HandleInput, Output, StrInput\nfrom langflow.schema import Data\nfrom loguru import logger\nfrom PIL import Image\n\nfrom langflow.services.deps import get_flexstore_service\nfrom langflow.services.flexstore.settings import FlexStoreSettings\n\nflexstore_settings = FlexStoreSettings()\n\n\nclass SplitIntoImagesComponent(BaseFileComponent):\n \"\"\"Component for splitting PDFs/TIFFs into individual images and uploading to blob storage.\"\"\"\n\n display_name = \"Split Into Images\"\n description = \"Split PDFs and TIFFs into individual images\"\n documentation = \"http://docs.langflow.org/components/custom\"\n icon = \"FileType\"\n name = \"split_into_images\"\n\n VALID_EXTENSIONS = [\"pdf\", \"tiff\", \"tif\"]\n\n inputs = [\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"silent_errors\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"delete_server_file_after_processing\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unsupported_extensions\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unspecified_files\"\n ),\n HandleInput(\n name=\"file_path\",\n display_name=\"URL\",\n info=(\n \"Upload file via URL or local server path. Supports: \\n\"\n \"1. Direct HTTP/HTTPS URLs for remote files\\n\"\n \"2. Local server file paths\\n\"\n \"3. Data objects with file path property\\n\"\n \"4. Message objects containing file paths\\n\"\n \"\\nSupports the same file types as the Path input. \"\n \"Takes precedence over Path input when both are provided.\"\n ),\n required=False,\n input_types=[\"Data\", \"Message\"],\n is_list=True,\n ),\n StrInput(\n name=\"storage_account\",\n display_name=\"Storage Account\",\n required=False,\n info=\"Storage Account name\",\n advanced=True,\n ),\n StrInput(\n name=\"temp_container\",\n display_name=\"Temporary Container\",\n required=False,\n info=\"Temporary container name for storing split images\",\n advanced=True,\n ),\n BoolInput(\n name=\"keep_original_size\",\n display_name=\"Keep Original Size\",\n value=True,\n info=\"Keep the original image size when splitting\",\n ),\n ]\n\n outputs = [\n Output(name=\"image_urls\", display_name=\"Image URLs\", method=\"get_image_urls\")\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self.temp_dir = tempfile.mkdtemp()\n self._downloaded_files = {}\n\n async def _validate_and_resolve_paths_async(\n self,\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Handle URLs and local paths asynchronously.\"\"\"\n resolved_files = []\n file_path = self._file_path_as_list()\n\n for obj in file_path:\n server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)\n\n if not server_file_path:\n if not self.ignore_unspecified_files:\n msg = f\"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property.\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n try:\n # Check if it's a URL\n if isinstance(server_file_path, str) and server_file_path.startswith(\n (\"http://\", \"https://\")\n ):\n local_path = await self._download_file_from_url(server_file_path)\n if not local_path:\n continue\n\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": server_file_path,\n }\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=True,\n silent_errors=self.silent_errors,\n )\n )\n else:\n # Handle local files\n resolved_path = Path(self.resolve_path(str(server_file_path)))\n if not resolved_path.exists():\n msg = f\"File not found: {server_file_path}\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n obj,\n resolved_path,\n delete_after_processing=self.delete_server_file_after_processing,\n silent_errors=self.silent_errors,\n )\n )\n\n except Exception as e:\n logger.error(f\"Error processing path {server_file_path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n return resolved_files\n\n async def _download_file_from_url(self, url: str) -> str | None:\n \"\"\"Download a file from a URL.\"\"\"\n try:\n filename = os.path.basename(urlparse(url).path)\n if not filename:\n filename = \"downloaded_file.pdf\"\n\n local_path = os.path.join(self.temp_dir, filename)\n\n async with aiohttp.ClientSession() as session:\n async with session.get(url) as response:\n response.raise_for_status()\n with open(local_path, \"wb\") as f:\n while True:\n chunk = await response.content.read(8192)\n if not chunk:\n break\n f.write(chunk)\n\n self._downloaded_files[url] = local_path\n logger.info(f\"Successfully downloaded file to {local_path}\")\n return local_path\n\n except Exception as e:\n logger.error(f\"Error downloading file from URL: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n async def _split_pdf_to_images(self, file_path: str) -> list[bytes]:\n \"\"\"Split PDF into individual page images.\"\"\"\n try:\n image_bytes_list = []\n\n # Open PDF\n pdf_document = fitz.open(file_path)\n\n for page_num in range(pdf_document.page_count):\n page = pdf_document[page_num]\n\n # Get page as an image\n pix = page.get_pixmap()\n img_data = pix.tobytes(\"png\")\n\n if not self.keep_original_size:\n # Resize if needed using PIL\n img = Image.open(io.BytesIO(img_data))\n max_size = (800, 800)\n img.thumbnail(max_size, Image.Resampling.LANCZOS)\n\n # Convert back to bytes\n img_byte_arr = io.BytesIO()\n img.save(img_byte_arr, format=\"PNG\")\n img_data = img_byte_arr.getvalue()\n\n image_bytes_list.append(img_data)\n\n pdf_document.close()\n return image_bytes_list\n\n except Exception as e:\n logger.error(f\"Error splitting PDF: {e!s}\")\n if not self.silent_errors:\n raise\n return []\n\n async def _split_tiff_to_images(self, file_path: str) -> list[bytes]:\n \"\"\"Split TIFF into individual images.\"\"\"\n try:\n with Image.open(file_path) as img:\n image_bytes_list = []\n\n for i in range(img.n_frames):\n img.seek(i)\n frame = img.copy()\n\n if not self.keep_original_size:\n # Resize if needed\n max_size = (800, 800) # Example max size\n frame.thumbnail(max_size, Image.Resampling.LANCZOS)\n\n img_byte_arr = io.BytesIO()\n frame.save(img_byte_arr, format=\"PNG\")\n image_bytes_list.append(img_byte_arr.getvalue())\n\n return image_bytes_list\n\n except Exception as e:\n logger.error(f\"Error splitting TIFF: {e!s}\")\n if not self.silent_errors:\n raise\n return []\n\n async def _upload_image_to_blob(\n self, image_bytes: bytes, filename: str\n ) -> str | None:\n \"\"\"Upload an image to blob storage and get its signed URL.\"\"\"\n try:\n service = get_flexstore_service()\n\n # Get upload URL\n upload_url = await service.get_signed_url_upload(\n storage_account=self.storage_account\n or flexstore_settings.DEFAULT_TEMPORARY_STORAGE_ACCOUNT,\n container_name=self.temp_container\n or flexstore_settings.DEFAULT_TEMPORARY_STORAGE_CONTAINER,\n file_name=filename,\n )\n\n if not upload_url:\n raise ValueError(\"Failed to get upload URL\")\n\n headers = {\n \"x-ms-blob-type\": \"BlockBlob\", # Required header for Azure Blob Storage\n \"Content-Type\": \"image/png\", # Since we're saving as PNG\n }\n\n # Upload the image\n async with aiohttp.ClientSession() as session:\n async with session.put(\n upload_url,\n data=image_bytes,\n headers=headers,\n ) as response:\n response.raise_for_status()\n\n # Get read URL\n read_url = await service.get_signed_url(\n storage_account=self.storage_account\n or flexstore_settings.DEFAULT_TEMPORARY_STORAGE_ACCOUNT,\n container_name=self.temp_container\n or flexstore_settings.DEFAULT_TEMPORARY_STORAGE_CONTAINER,\n file_name=filename,\n )\n\n return read_url\n\n except Exception as e:\n logger.error(f\"Error uploading image: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n def process_files(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process the files as required by BaseFileComponent\"\"\"\n if not file_list:\n msg = \"No files to process.\"\n if not self.silent_errors:\n raise ValueError(msg)\n logger.warning(msg)\n return file_list\n\n async def _process_files_for_images(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[Data]:\n \"\"\"Internal method to process files and generate image URLs.\"\"\"\n processed_files = []\n\n for file in file_list:\n try:\n # Split file into images based on type\n ext = file.path.suffix.lower()\n if ext == \".pdf\":\n images = await self._split_pdf_to_images(str(file.path))\n elif ext in [\".tiff\", \".tif\"]:\n images = await self._split_tiff_to_images(str(file.path))\n else:\n continue\n\n # Upload each image and get URLs\n image_urls = []\n for i, image_bytes in enumerate(images):\n filename = f\"{file.path.stem}_page_{i + 1}.png\"\n url = await self._upload_image_to_blob(image_bytes, filename)\n if url:\n image_urls.append(url)\n\n # Create Data object with image URLs\n if image_urls:\n data = Data(data={\"file_path\": image_urls})\n processed_files.append(data)\n\n except Exception as e:\n logger.error(f\"Error processing file {file.path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n return processed_files\n\n async def get_image_urls(self) -> list[Data]:\n \"\"\"Output method that processes files and returns image URLs.\"\"\"\n try:\n # Use async validation that handles URLs\n files = await self._validate_and_resolve_paths_async()\n if not files:\n msg = \"No valid files provided\"\n if not self.silent_errors:\n raise ValueError(msg)\n return []\n\n # Process files and get image URLs\n return await self._process_files_for_images(files)\n\n except Exception as e:\n logger.error(f\"Error processing images: {e!s}\")\n if not self.silent_errors:\n return []\n raise\n\n def __del__(self):\n \"\"\"Clean up temporary files.\"\"\"\n try:\n if hasattr(self, \"temp_dir\") and os.path.exists(self.temp_dir):\n for file_path in self._downloaded_files.values():\n if os.path.exists(file_path):\n os.remove(file_path)\n os.rmdir(self.temp_dir)\n except Exception as e:\n logger.error(f\"Error cleaning up temporary files: {e!s}\")\n" - }, - "delete_server_file_after_processing": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Delete Server File After Processing", - "dynamic": false, - "info": "If true, the Server File Path will be deleted after processing.", - "list": false, - "list_add_label": "Add More", - "name": "delete_server_file_after_processing", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true + "value": "from __future__ import annotations\n\nfrom typing import Any\n\nfrom langflow.custom import Component\nfrom langflow.services.deps import get_knowledge_service\nfrom langflow.io import DropdownInput, IntInput, MultilineInput, MultiselectInput, Output\nfrom langflow.schema import Data\nfrom loguru import logger\n\n# Import the service and factory directly\nfrom langflow.services.knowledge.factory import KnowledgeServiceFactory\nfrom langflow.services.knowledge.service import KnowledgeService\n\n\nclass KnowledgeHubSearchComponent(Component):\n display_name = \"Knowledge Hub Search\"\n description = (\n \"This component is used to search for information in the knowledge hub.\"\n )\n documentation: str = \"http://docs.langflow.org/components/custom\"\n icon = \"Autonomize\"\n name = \"KnowledgeHubSearch\"\n\n def __init__(self, **kwargs):\n self._hub_data: list[dict[str, str]] = []\n self._selected_hub_names: list[str] = [] # Track selected hub names\n self._knowledge_service: KnowledgeService | None = None # Cache the service instance\n super().__init__(**kwargs)\n\n def _get_knowledge_service(self) -> KnowledgeService:\n \"\"\"Get or create the knowledge service instance.\"\"\"\n if self._knowledge_service is None:\n factory = KnowledgeServiceFactory()\n self._knowledge_service = factory.create()\n return self._knowledge_service\n\n async def update_build_config(\n self, build_config: dict, field_value: Any, field_name: str | None = None\n ):\n \"\"\"Update the build configuration based on field changes.\"\"\"\n logger.info(f\"update_build_config called with field_name: {field_name}\")\n\n if field_name == \"selected_hubs\":\n try:\n # Get the knowledge service directly\n service = self._get_knowledge_service()\n if not service.ready:\n logger.error(\"KnowledgeHub service is not ready\")\n return build_config\n \n self._hub_data = await service.get_knowledge_hubs()\n\n # Debug the raw response\n logger.info(f\"Raw hub data: {self._hub_data}\")\n\n options = [hub[\"name\"] for hub in self._hub_data]\n logger.info(f\"Extracted hub options: {options}\")\n\n # Debug the build_config before update\n logger.info(\n f\"Build config before update: {build_config.get('selected_hubs', {})}\"\n )\n\n build_config[\"selected_hubs\"][\"options\"] = options\n\n # Store selected hub names for validation during build\n if field_value and isinstance(field_value, list):\n self._selected_hub_names = field_value\n logger.info(f\"Stored selected hub names: {self._selected_hub_names}\")\n\n # Debug the build_config after update\n logger.info(\n f\"Build config after update: {build_config.get('selected_hubs', {})}\"\n )\n\n return build_config\n except Exception as e:\n logger.exception(f\"Error in update_build_config: {e!s}\")\n raise\n return build_config\n\n inputs = [\n MultilineInput(\n name=\"search_query\",\n display_name=\"Search Query\",\n tool_mode=True,\n ),\n MultiselectInput(\n name=\"selected_hubs\",\n display_name=\"Data Sources\",\n value=[],\n refresh_button=True,\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Query Results\",\n name=\"query_results\",\n method=\"build_output\",\n ),\n ]\n\n async def _validate_and_refresh_data_sources(self) -> tuple[bool, list[str]]:\n \"\"\"Validate that the selected data sources are still available, if not fetch and update them\"\"\"\n if not self._selected_hub_names:\n logger.info(\"No data sources selected, validation skipped\")\n return True, []\n \n try:\n # Get the knowledge service directly\n service = self._get_knowledge_service()\n if not service.ready:\n logger.error(\"KnowledgeHub service is not ready for validation\")\n return True, self._selected_hub_names # Return original selection if service not ready\n \n fresh_hub_data = await service.get_knowledge_hubs()\n available_names = [hub[\"name\"] for hub in fresh_hub_data]\n \n logger.info(f\"Available data sources: {available_names}\")\n logger.info(f\"Selected data sources: {self._selected_hub_names}\")\n \n # Check which selected hubs are still available\n still_available = []\n missing_hubs = []\n refreshed_hubs = []\n \n for selected_name in self._selected_hub_names:\n if selected_name in available_names:\n still_available.append(selected_name)\n logger.info(f\"Data source '{selected_name}' is still available\")\n else:\n missing_hubs.append(selected_name)\n logger.warning(f\"Data source '{selected_name}' is no longer available\")\n \n # Try to find missing hubs in fresh data (in case of name changes or refresh issues)\n for missing_name in missing_hubs:\n # Look for exact match first\n found_hub = next(\n (hub for hub in fresh_hub_data if hub[\"name\"] == missing_name), None\n )\n \n if found_hub:\n still_available.append(found_hub[\"name\"])\n refreshed_hubs.append(found_hub[\"name\"])\n logger.info(f\"Refreshed data source '{missing_name}' found and re-added\")\n else:\n # Try partial match (in case of minor name changes)\n partial_matches = [\n hub[\"name\"] for hub in fresh_hub_data \n if missing_name.lower() in hub[\"name\"].lower() or hub[\"name\"].lower() in missing_name.lower()\n ]\n \n if partial_matches:\n logger.info(f\"Possible matches for missing '{missing_name}': {partial_matches}\")\n # For now, don't auto-select partial matches, just log them\n else:\n logger.error(f\"Data source '{missing_name}' not found even after refresh\")\n \n # Update the hub data cache\n self._hub_data = fresh_hub_data\n \n # Update selected hub names to only include available ones\n self._selected_hub_names = still_available\n \n if refreshed_hubs:\n logger.info(f\"Successfully refreshed data sources: {refreshed_hubs}\")\n \n if missing_hubs and not refreshed_hubs:\n logger.warning(f\"Some data sources are no longer available: {[h for h in missing_hubs if h not in refreshed_hubs]}\")\n return False, still_available\n \n return True, still_available\n \n except Exception as e:\n logger.error(f\"Error validating/refreshing data sources: {e}\")\n logger.exception(\"Full error details:\")\n # If we can't validate, return original selection to avoid breaking the flow\n return True, self._selected_hub_names\n\n async def build_output(self) -> Data:\n \"\"\"Generate the output based on selected knowledge hubs.\"\"\"\n try:\n # Validate and refresh data sources if needed\n if self._selected_hub_names:\n is_valid, validated_hubs = await self._validate_and_refresh_data_sources()\n \n if not is_valid and not validated_hubs:\n error_message = f\"Error: Selected data sources are no longer available. Please select different data sources.\"\n logger.error(error_message)\n return Data(\n text=error_message,\n data={\"error\": error_message, \"query_results\": []}\n )\n \n # Use validated hubs instead of self.selected_hubs\n effective_selected_hubs = validated_hubs\n else:\n effective_selected_hubs = self.selected_hubs if hasattr(self, 'selected_hubs') else []\n\n if not effective_selected_hubs:\n logger.warning(\"No knowledge hubs selected or available.\")\n return Data(value={\"query_results\": []})\n\n # Make sure we have hub data\n if not self._hub_data:\n service = self._get_knowledge_service()\n if not service.ready:\n logger.error(\"KnowledgeHub service is not ready\")\n return Data(value={\"query_results\": []})\n self._hub_data = await service.get_knowledge_hubs()\n\n # Map the selected names to their IDs\n selected_hub_ids = [\n hub[\"id\"] for hub in self._hub_data if hub[\"name\"] in effective_selected_hubs\n ]\n\n logger.info(f\"Using data sources: {effective_selected_hubs}\")\n logger.info(f\"Mapped to hub IDs: {selected_hub_ids}\")\n\n service = self._get_knowledge_service()\n if not service.ready:\n logger.error(\"KnowledgeHub service is not ready\")\n return Data(value={\"query_results\": []})\n \n query_results = await service.query_vector_store(\n knowledge_hub_ids=selected_hub_ids, query=self.search_query\n )\n logger.debug(f\"query_results: {query_results}\")\n \n # Concatenate content from query results\n contents = [\n result.get(\"metadata\", {}).get(\"content\", \"\")\n for result in query_results\n ]\n plain_text = \"\\n\\n=== NEW CHUNK ===\\n\\n\".join(contents)\n\n data = Data(\n text=plain_text,\n data={\n \"result\": query_results,\n \"used_data_sources\": effective_selected_hubs, # Include which sources were actually used\n },\n )\n self.status = data\n return data\n\n except Exception as e:\n logger.error(f\"Error in build_output: {e!s}\")\n return Data(value={\"query_results\": []})\n\n async def __aenter__(self):\n \"\"\"Async context manager entry.\"\"\"\n return self\n\n async def __aexit__(self, exc_type, exc_val, exc_tb):\n \"\"\"Async context manager exit - cleanup resources.\"\"\"\n if self._knowledge_service:\n await self._knowledge_service.cleanup()\n self._knowledge_service = None\n" }, - "file_path": { - "_input_type": "HandleInput", + "search_query": { + "_input_type": "MultilineInput", "advanced": false, - "display_name": "URL", + "copy_field": false, + "display_name": "Search Query", "dynamic": false, - "info": "Upload file via URL or local server path. Supports: \n1. Direct HTTP/HTTPS URLs for remote files\n2. Local server file paths\n3. Data objects with file path property\n4. Message objects containing file paths\n\nSupports the same file types as the Path input. Takes precedence over Path input when both are provided.", + "info": "", "input_types": [ - "Data", "Message" ], - "list": true, - "list_add_label": "Add More", - "name": "file_path", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "other", - "value": "" - }, - "ignore_unspecified_files": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Ignore Unspecified Files", - "dynamic": false, - "info": "If true, Data with no 'file_path' property will be ignored.", - "list": false, - "list_add_label": "Add More", - "name": "ignore_unspecified_files", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "ignore_unsupported_extensions": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Ignore Unsupported Extensions", - "dynamic": false, - "info": "If true, files with unsupported extensions will not be processed.", - "list": false, - "list_add_label": "Add More", - "name": "ignore_unsupported_extensions", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "keep_original_size": { - "_input_type": "BoolInput", - "advanced": false, - "display_name": "Keep Original Size", - "dynamic": false, - "info": "Keep the original image size when splitting", - "list": false, - "list_add_label": "Add More", - "name": "keep_original_size", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "silent_errors": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Silent Errors", - "dynamic": false, - "info": "If true, errors will not raise an exception.", - "list": false, - "list_add_label": "Add More", - "name": "silent_errors", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "storage_account": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Storage Account", - "dynamic": false, - "info": "Storage Account name", "list": false, "list_add_label": "Add More", "load_from_db": false, - "name": "storage_account", + "multiline": true, + "name": "search_query", "placeholder": "", "required": false, "show": true, "title_case": false, - "tool_mode": false, + "tool_mode": true, + "trace_as_input": true, "trace_as_metadata": true, "type": "str", "value": "" }, - "temp_container": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Temporary Container", + "selected_hubs": { + "_input_type": "MultiselectInput", + "advanced": false, + "combobox": false, + "display_name": "Data Sources", "dynamic": false, - "info": "Temporary container name for storing split images", - "list": false, + "info": "", + "list": true, "list_add_label": "Add More", "load_from_db": false, - "name": "temp_container", + "name": "selected_hubs", + "options": [], "placeholder": "", + "refresh_button": true, "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "" + "value": [ + "Carelon Guidelines - 2023" + ] } }, "tool_mode": false }, "showNode": true, - "type": "split_into_images" + "type": "KnowledgeHubSearch" }, - "dragging": false, - "id": "split_into_images-mWZ3Q", + "id": "KnowledgeHubSearch-uubn9", "measured": { - "height": 242, + "height": 302, "width": 320 }, "position": { - "x": 596, - "y": 197 + "x": 1961.0146798746339, + "y": 334 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "srf-identification-ZaSBG", + "description": "Extracts text using a template.", + "display_name": "Parser", + "id": "ParserComponent-VGmQj", "node": { "base_classes": [ - "Data" + "Message" ], "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Model for SRF Identification", - "display_name": "SRF Identification", - "documentation": "http://docs.langflow.org/components/custom", + "description": "Extracts text using a template.", + "display_name": "Parser", + "documentation": "https://docs.langflow.org/components-processing#parser", "edited": false, "field_order": [ - "file_path", - "silent_errors", - "delete_server_file_after_processing", - "ignore_unsupported_extensions", - "ignore_unspecified_files" + "input_data", + "mode", + "pattern", + "sep" ], "frozen": false, - "icon": "Autonomize", + "icon": "braces", "legacy": false, - "lf_version": "1.4.3", "metadata": {}, "minimized": false, "output_types": [], @@ -573,14 +567,14 @@ { "allows_loop": false, "cache": true, - "display_name": "SRF Identification", - "method": "process_srf", - "name": "srfIdentification", - "required_inputs": [], - "selected": "Data", + "display_name": "Parsed Text", + "group_outputs": false, + "method": "parse_combined_text", + "name": "parsed_text", + "selected": "Message", "tool_mode": true, "types": [ - "Data" + "Message" ], "value": "__UNDEFINED__" } @@ -604,161 +598,358 @@ "show": true, "title_case": false, "type": "code", - "value": "import os\nimport tempfile\nfrom pathlib import Path\nfrom typing import Any\nfrom urllib.parse import urlparse\n\nimport aiohttp\nfrom loguru import logger\n\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.base.data.utils import IMG_FILE_TYPES\nfrom app.base.modelhub import ATModelComponent\nfrom langflow.io import Output\nfrom langflow.schema import Data\nfrom app.services.modelhub.model_endpoint import ModelEndpoint\n\n\nclass SRFIdentificationComponent(\n ATModelComponent, BaseFileComponent\n): # Changed inheritance order\n display_name = \"SRF Identification\"\n description = \"Model for SRF Identification\"\n documentation: str = \"http://docs.langflow.org/components/custom\"\n icon = \"Autonomize\"\n name = \"srf-identification\"\n _model_name = ModelEndpoint.SRF_IDENTIFICATION\n\n VALID_EXTENSIONS = IMG_FILE_TYPES\n\n inputs = [\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"file_path\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"silent_errors\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"delete_server_file_after_processing\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unsupported_extensions\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unspecified_files\"\n ),\n ]\n\n outputs = [\n Output(\n name=\"srfIdentification\",\n display_name=\"SRF Identification\",\n method=\"process_srf\",\n ),\n ]\n\n # Explicitly define the endpoint\n def __init__(self, **kwargs):\n ATModelComponent.__init__(self, **kwargs) # Initialize ATModelComponent first\n BaseFileComponent.__init__(self, **kwargs) # Then initialize BaseFileComponent\n self._modelhub_service = None\n self.temp_dir = tempfile.mkdtemp()\n self._downloaded_files = {}\n self._endpoint = \"https://tolstoy-v2.modelhub.sprint.autonomize.dev/v1/models/identification:predict\"\n\n async def _download_file_from_url(self, url: str) -> str | None:\n \"\"\"Download a file from a URL.\"\"\"\n try:\n filename = os.path.basename(urlparse(url).path)\n if not filename:\n filename = \"downloaded_image.png\"\n\n local_path = os.path.join(self.temp_dir, filename)\n\n async with aiohttp.ClientSession() as session:\n async with session.get(url) as response:\n response.raise_for_status()\n with open(local_path, \"wb\") as f:\n while True:\n chunk = await response.content.read(8192)\n if not chunk:\n break\n f.write(chunk)\n\n self._downloaded_files[url] = local_path\n logger.info(f\"Successfully downloaded file to {local_path}\")\n return local_path\n\n except Exception as e:\n logger.error(f\"Error downloading file from URL: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n async def _validate_and_resolve_paths_async(\n self,\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Handle URLs and local paths asynchronously.\"\"\"\n resolved_files = []\n file_paths = self._file_path_as_list()\n\n for obj in file_paths:\n server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)\n\n if not server_file_path:\n if not self.ignore_unspecified_files:\n msg = f\"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property.\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n try:\n # Handle if server_file_path is a list\n paths_to_process = (\n server_file_path\n if isinstance(server_file_path, list)\n else [server_file_path]\n )\n\n for path in paths_to_process:\n try:\n # Check if it's a URL\n if isinstance(path, str) and path.startswith(\n (\"http://\", \"https://\")\n ):\n local_path = await self._download_file_from_url(path)\n if not local_path:\n continue\n\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": path,\n }\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=True,\n )\n )\n else:\n # Handle local files\n resolved_path = Path(self.resolve_path(str(path)))\n if not resolved_path.exists():\n msg = f\"File not found: {path}\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n obj,\n resolved_path,\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n\n except Exception as e:\n logger.error(f\"Error processing path {path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n except Exception as e:\n logger.error(f\"Error processing file paths: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n return resolved_files\n\n # async def _read_file(self, file_path: Path) -> bytes:\n # \"\"\"Read file content from a file path\"\"\"\n # try:\n # with open(file_path, \"rb\") as f:\n # return f.read()\n # except Exception as e:\n # logger.error(f\"Error reading file {file_path}: {e!s}\")\n # if not self.silent_errors:\n # raise ValueError(f\"Error reading file: {e!s}\") from e\n # return None\n\n async def extract_srf(self, file_path: Path) -> Any:\n \"\"\"Extract SRF from the image\"\"\"\n try:\n logger.debug(\"Sending request to model endpoint\")\n # response = await self.predict(\n # endpoint=self._endpoint,\n # binary_data=image_data,\n # content_type=\"image/png\"\n # )\n response = await self.predict(\n endpoint=self._endpoint, file_path=file_path, content_type=\"image/png\"\n )\n logger.debug(f\"Model response: {response}\")\n return response\n except Exception as e:\n logger.error(f\"Error in SRF extraction: {e!s}\")\n if not self.silent_errors:\n raise ValueError(f\"Error in SRF extraction: {e!s}\") from e\n return None\n\n def process_files(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process the files as required by BaseFileComponent\"\"\"\n if not file_list:\n msg = \"No files to process.\"\n if not self.silent_errors:\n raise ValueError(msg)\n logger.warning(msg)\n return file_list\n\n async def process_srf(self) -> Data:\n \"\"\"Process multiple images and generate SRF identification results\"\"\"\n try:\n # Use async validation that handles URLs\n files = await self._validate_and_resolve_paths_async()\n if not files:\n msg = \"No valid files provided\"\n logger.warning(msg)\n if not self.silent_errors:\n raise ValueError(msg)\n return Data(value=[]) # Return empty list instead of error\n\n results = []\n for file in files:\n try:\n # Read the file\n # image_data = await self._read_file(file.path)\n # if not image_data:\n # msg = f\"Could not read file: {file.path}\"\n # logger.warning(msg)\n # continue\n\n # Extract SRF identification from the image\n result = await self.extract_srf(file.path)\n logger.debug(\n f\"Model result: {result}\"\n ) # Debug log to see the result\n\n if result and isinstance(result, dict):\n # Add file information to result with the new format\n original_url = file.data[0].data.get(\n \"original_url\", str(file.path)\n )\n\n # Extract the prediction value from the nested data structure\n # Handle both possible response formats\n if \"data\" in result:\n if (\n isinstance(result[\"data\"], dict)\n and \"data\" in result[\"data\"]\n ):\n prediction = result[\"data\"][\"data\"]\n else:\n prediction = result[\"data\"]\n else:\n prediction = \"unknown\"\n\n results.append(\n {\"file_path\": original_url, \"prediction\": prediction}\n )\n logger.debug(\n f\"Processed result: {results[-1]}\"\n ) # Debug log the processed result\n\n # Handle cleanup\n if file.delete_after_processing and file.path.exists():\n try:\n os.remove(file.path)\n except Exception as e:\n logger.warning(f\"Failed to delete file {file.path}: {e!s}\")\n\n except Exception as e:\n logger.error(f\"Error processing file {file.path}: {e!s}\")\n if not self.silent_errors:\n continue\n logger.warning(\"Continuing to next file due to silent_errors=True\")\n\n if not results:\n msg = \"No successful identifications\"\n logger.warning(msg)\n return Data(value=[]) # Return empty list instead of raising error\n\n # Return array of results directly\n logger.info(f\"Successfully processed {len(results)} files\")\n return Data(value=results)\n\n except Exception as e:\n logger.error(f\"Error processing images: {e!s}\")\n if not self.silent_errors:\n raise ValueError(f\"Error processing images: {e!s}\")\n return Data(value=[]) # Return empty list instead of error\n\n def __del__(self):\n \"\"\"Clean up temporary files.\"\"\"\n if (\n hasattr(self, \"temp_dir\")\n and self.temp_dir\n and os.path.exists(self.temp_dir)\n ):\n try:\n for file_path in self._downloaded_files.values():\n if os.path.exists(file_path):\n os.remove(file_path)\n os.rmdir(self.temp_dir)\n except Exception as e:\n logger.error(f\"Error cleaning up temporary files: {e!s}\")\n" + "value": "from langflow.custom.custom_component.component import Component\nfrom langflow.helpers.data import safe_convert\nfrom langflow.inputs.inputs import BoolInput, HandleInput, MessageTextInput, MultilineInput, TabInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.template.field.base import Output\n\n\nclass ParserComponent(Component):\n display_name = \"Parser\"\n description = \"Extracts text using a template.\"\n documentation: str = \"https://docs.langflow.org/components-processing#parser\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"input_data\",\n display_name=\"Data or DataFrame\",\n input_types=[\"DataFrame\", \"Data\"],\n info=\"Accepts either a DataFrame or a Data object.\",\n required=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Parser\", \"Stringify\"],\n value=\"Parser\",\n info=\"Convert into raw string instead of using a template.\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"pattern\",\n display_name=\"Template\",\n info=(\n \"Use variables within curly brackets to extract column values for DataFrames \"\n \"or key values for Data.\"\n \"For example: `Name: {Name}, Age: {Age}, Country: {Country}`\"\n ),\n value=\"Text: {text}\", # Example default\n dynamic=True,\n show=True,\n required=True,\n ),\n MessageTextInput(\n name=\"sep\",\n display_name=\"Separator\",\n advanced=True,\n value=\"\\n\",\n info=\"String used to separate rows/items.\",\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Parsed Text\",\n name=\"parsed_text\",\n info=\"Formatted text output.\",\n method=\"parse_combined_text\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Dynamically hide/show `template` and enforce requirement based on `stringify`.\"\"\"\n if field_name == \"mode\":\n build_config[\"pattern\"][\"show\"] = self.mode == \"Parser\"\n build_config[\"pattern\"][\"required\"] = self.mode == \"Parser\"\n if field_value:\n clean_data = BoolInput(\n name=\"clean_data\",\n display_name=\"Clean Data\",\n info=(\n \"Enable to clean the data by removing empty rows and lines \"\n \"in each cell of the DataFrame/ Data object.\"\n ),\n value=True,\n advanced=True,\n required=False,\n )\n build_config[\"clean_data\"] = clean_data.to_dict()\n else:\n build_config.pop(\"clean_data\", None)\n\n return build_config\n\n def _clean_args(self):\n \"\"\"Prepare arguments based on input type.\"\"\"\n input_data = self.input_data\n\n match input_data:\n case list() if all(isinstance(item, Data) for item in input_data):\n msg = \"List of Data objects is not supported.\"\n raise ValueError(msg)\n case DataFrame():\n return input_data, None\n case Data():\n return None, input_data\n case dict() if \"data\" in input_data:\n try:\n if \"columns\" in input_data: # Likely a DataFrame\n return DataFrame.from_dict(input_data), None\n # Likely a Data object\n return None, Data(**input_data)\n except (TypeError, ValueError, KeyError) as e:\n msg = f\"Invalid structured input provided: {e!s}\"\n raise ValueError(msg) from e\n case _:\n msg = f\"Unsupported input type: {type(input_data)}. Expected DataFrame or Data.\"\n raise ValueError(msg)\n\n def parse_combined_text(self) -> Message:\n \"\"\"Parse all rows/items into a single text or convert input to string if `stringify` is enabled.\"\"\"\n # Early return for stringify option\n if self.mode == \"Stringify\":\n return self.convert_to_string()\n\n df, data = self._clean_args()\n\n lines = []\n if df is not None:\n for _, row in df.iterrows():\n formatted_text = self.pattern.format(**row.to_dict())\n lines.append(formatted_text)\n elif data is not None:\n formatted_text = self.pattern.format(**data.data)\n lines.append(formatted_text)\n\n combined_text = self.sep.join(lines)\n self.status = combined_text\n return Message(text=combined_text)\n\n def convert_to_string(self) -> Message:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n result = \"\"\n if isinstance(self.input_data, list):\n result = \"\\n\".join([safe_convert(item, clean_data=self.clean_data or False) for item in self.input_data])\n else:\n result = safe_convert(self.input_data or False)\n self.log(f\"Converted to string with length: {len(result)}\")\n\n message = Message(text=result)\n self.status = message\n return message\n" }, - "delete_server_file_after_processing": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Delete Server File After Processing", + "input_data": { + "_input_type": "HandleInput", + "advanced": false, + "display_name": "Data or DataFrame", "dynamic": false, - "info": "If true, the Server File Path will be deleted after processing.", + "info": "Accepts either a DataFrame or a Data object.", + "input_types": [ + "DataFrame", + "Data" + ], "list": false, "list_add_label": "Add More", - "name": "delete_server_file_after_processing", + "name": "input_data", "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, - "tool_mode": false, "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "other", + "value": "" }, - "file_path": { - "_input_type": "HandleInput", + "mode": { + "_input_type": "TabInput", "advanced": false, - "display_name": "Server File Path", + "display_name": "Mode", "dynamic": false, - "info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.", + "info": "Convert into raw string instead of using a template.", + "name": "mode", + "options": [ + "Parser", + "Stringify" + ], + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "tab", + "value": "Parser" + }, + "pattern": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "Template", + "dynamic": true, + "info": "Use variables within curly brackets to extract column values for DataFrames or key values for Data.For example: `Name: {Name}, Age: {Age}, Country: {Country}`", "input_types": [ - "Data", "Message" ], - "list": true, + "list": false, "list_add_label": "Add More", - "name": "file_path", + "load_from_db": false, + "multiline": true, + "name": "pattern", "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, + "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "other", - "value": "" + "type": "str", + "value": "Text: {text}" }, - "ignore_unspecified_files": { - "_input_type": "BoolInput", + "sep": { + "_input_type": "MessageTextInput", "advanced": true, - "display_name": "Ignore Unspecified Files", + "display_name": "Separator", "dynamic": false, - "info": "If true, Data with no 'file_path' property will be ignored.", + "info": "String used to separate rows/items.", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", - "name": "ignore_unspecified_files", + "load_from_db": false, + "name": "sep", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "ignore_unsupported_extensions": { - "_input_type": "BoolInput", + "type": "str", + "value": "\n" + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "ParserComponent" + }, + "id": "ParserComponent-VGmQj", + "measured": { + "height": 329, + "width": 320 + }, + "position": { + "x": 2547.8104395554437, + "y": 591.5804763274202 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "Prompt-L80eY", + "node": { + "base_classes": [ + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": { + "template": [ + "question", + "context" + ] + }, + "description": "Create a prompt template with dynamic variables.", + "display_name": "Prompt", + "documentation": "", + "edited": false, + "error": null, + "field_order": [ + "template", + "tool_placeholder" + ], + "frozen": false, + "full_path": null, + "icon": "prompts", + "is_composition": null, + "is_input": null, + "is_output": null, + "legacy": false, + "metadata": {}, + "minimized": false, + "name": "", + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Prompt Message", + "method": "build_prompt", + "name": "prompt", + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "priority": null, + "template": { + "_type": "Component", + "code": { "advanced": true, - "display_name": "Ignore Unsupported Extensions", + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from langflow.base.prompts.api_utils import process_prompt_template\nfrom langflow.custom import Component\nfrom langflow.inputs.inputs import DefaultPromptField\nfrom langflow.io import MessageTextInput, Output, PromptInput\nfrom langflow.schema.message import Message\nfrom langflow.template.utils import update_template_values\n\n\nclass PromptComponent(Component):\n display_name: str = \"Prompt\"\n description: str = \"Create a prompt template with dynamic variables.\"\n icon = \"prompts\"\n trace_type = \"prompt\"\n name = \"Prompt\"\n\n inputs = [\n PromptInput(name=\"template\", display_name=\"Template\"),\n MessageTextInput(\n name=\"tool_placeholder\",\n display_name=\"Tool Placeholder\",\n tool_mode=True,\n advanced=True,\n info=\"A placeholder input for tool mode.\",\n ),\n ]\n\n outputs = [\n Output(display_name=\"Prompt Message\", name=\"prompt\", method=\"build_prompt\"),\n ]\n\n async def build_prompt(self) -> Message:\n prompt = Message.from_template(**self._attributes)\n self.status = prompt.text\n return prompt\n\n def _update_template(self, frontend_node: dict):\n prompt_template = frontend_node[\"template\"][\"template\"][\"value\"]\n custom_fields = frontend_node[\"custom_fields\"]\n frontend_node_template = frontend_node[\"template\"]\n _ = process_prompt_template(\n template=prompt_template,\n name=\"template\",\n custom_fields=custom_fields,\n frontend_node_template=frontend_node_template,\n )\n return frontend_node\n\n async def update_frontend_node(self, new_frontend_node: dict, current_frontend_node: dict):\n \"\"\"This function is called after the code validation is done.\"\"\"\n frontend_node = await super().update_frontend_node(new_frontend_node, current_frontend_node)\n template = frontend_node[\"template\"][\"template\"][\"value\"]\n # Kept it duplicated for backwards compatibility\n _ = process_prompt_template(\n template=template,\n name=\"template\",\n custom_fields=frontend_node[\"custom_fields\"],\n frontend_node_template=frontend_node[\"template\"],\n )\n # Now that template is updated, we need to grab any values that were set in the current_frontend_node\n # and update the frontend_node with those values\n update_template_values(new_template=frontend_node, previous_template=current_frontend_node[\"template\"])\n return frontend_node\n\n def _get_fallback_input(self, **kwargs):\n return DefaultPromptField(**kwargs)\n" + }, + "context": { + "advanced": false, + "display_name": "context", + "dynamic": false, + "field_type": "str", + "fileTypes": [], + "file_path": "", + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "load_from_db": false, + "multiline": true, + "name": "context", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "type": "str", + "value": "" + }, + "question": { + "advanced": false, + "display_name": "question", + "dynamic": false, + "field_type": "str", + "fileTypes": [], + "file_path": "", + "info": "", + "input_types": [ + "Message" + ], + "list": false, + "load_from_db": false, + "multiline": true, + "name": "question", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "type": "str", + "value": "" + }, + "template": { + "_input_type": "PromptInput", + "advanced": false, + "display_name": "Template", "dynamic": false, - "info": "If true, files with unsupported extensions will not be processed.", + "info": "", "list": false, "list_add_label": "Add More", - "name": "ignore_unsupported_extensions", + "name": "template", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true + "trace_as_input": true, + "type": "prompt", + "value": "Question:\n{question}\nContext:\n{context}\nClinical Guideline Criteria are as follows:" }, - "silent_errors": { - "_input_type": "BoolInput", + "tool_placeholder": { + "_input_type": "MessageTextInput", "advanced": true, - "display_name": "Silent Errors", + "display_name": "Tool Placeholder", "dynamic": false, - "info": "If true, errors will not raise an exception.", + "info": "A placeholder input for tool mode.", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", - "name": "silent_errors", + "load_from_db": false, + "name": "tool_placeholder", "placeholder": "", "required": false, "show": true, "title_case": false, - "tool_mode": false, + "tool_mode": true, + "trace_as_input": true, "trace_as_metadata": true, - "type": "bool", - "value": false + "type": "str", + "value": "" } }, "tool_mode": false }, "showNode": true, - "type": "srf-identification" + "type": "Prompt" }, - "dragging": false, - "id": "srf-identification-ZaSBG", + "id": "Prompt-L80eY", "measured": { - "height": 196, + "height": 435, "width": 320 }, "position": { - "x": 1022.0233976555, - "y": 226.81239901066226 + "x": 3169.6615303340686, + "y": 484.8903382036367 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "json_array_filter-8satn", + "description": "Generate text using Azure OpenAI LLMs.", + "display_name": "Azure OpenAI", + "id": "AzureOpenAIModel-qewCW", "node": { "base_classes": [ - "Data" + "LanguageModel", + "Message" ], "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Filter JSON array based on field conditions", - "display_name": "JSON Array Filter", - "documentation": "http://docs.langflow.org/components/custom", + "description": "Generate text using Azure OpenAI LLMs.", + "display_name": "Azure OpenAI", + "documentation": "https://python.langchain.com/docs/integrations/llms/azure_openai", "edited": false, "field_order": [ - "input_array", - "field_name", - "operator", - "value", - "case_sensitive", - "return_only_files", - "file_field" + "input_value", + "system_message", + "stream", + "azure_endpoint", + "azure_deployment", + "api_key", + "api_version", + "temperature", + "max_tokens" ], "frozen": false, - "icon": "Filter", + "icon": "Azure", "legacy": false, - "lf_version": "1.4.3", - "metadata": {}, + "metadata": { + "keywords": [ + "model", + "llm", + "language model", + "large language model" + ] + }, "minimized": false, "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "Filtered Array", - "method": "filter_array", - "name": "filtered_array", - "selected": "Data", + "display_name": "Model Response", + "group_outputs": false, + "method": "text_response", + "name": "text_output", + "selected": "Message", "tool_mode": true, "types": [ - "Data" + "Message" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "Language Model", + "group_outputs": false, + "method": "build_model", + "name": "model_output", + "selected": "LanguageModel", + "tool_mode": true, + "types": [ + "LanguageModel" ], "value": "__UNDEFINED__" } @@ -766,139 +957,172 @@ "pinned": false, "template": { "_type": "Component", - "case_sensitive": { - "_input_type": "BoolInput", + "api_key": { + "_input_type": "SecretStrInput", "advanced": false, - "display_name": "Case Sensitive", + "display_name": "Azure Chat OpenAI API Key", "dynamic": false, - "info": "Whether string comparisons should be case-sensitive", - "list": false, - "list_add_label": "Add More", - "name": "case_sensitive", + "info": "", + "input_types": [], + "load_from_db": true, + "name": "api_key", + "password": true, "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false + "type": "str", + "value": "" }, - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", + "api_version": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "API Version", + "dynamic": false, + "external_options": {}, "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, + "name": "api_version", + "options": [ + "2025-02-01-preview", + "2025-01-01-preview", + "2024-12-01-preview", + "2024-10-01-preview", + "2024-09-01-preview", + "2024-08-01-preview", + "2024-07-01-preview", + "2024-06-01", + "2024-03-01-preview", + "2024-02-15-preview", + "2023-12-01-preview", + "2023-05-15" + ], + "options_metadata": [], "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, - "type": "code", - "value": "from typing import Any\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, HandleInput, Output, StrInput\nfrom langflow.schema import Data\nfrom loguru import logger\n\n\nclass JSONArrayFilterComponent(Component):\n \"\"\"Component for filtering JSON arrays based on field conditions.\"\"\"\n\n display_name = \"JSON Array Filter\"\n description = \"Filter JSON array based on field conditions\"\n documentation = \"http://docs.langflow.org/components/custom\"\n icon = \"Filter\"\n name = \"json_array_filter\"\n\n inputs = [\n HandleInput(\n name=\"input_array\",\n display_name=\"Input Array\",\n info=\"Input array of JSON objects to filter\",\n required=True,\n input_types=[\"Data\"],\n is_list=True,\n ),\n StrInput(\n name=\"field_name\",\n display_name=\"Field Name\",\n info=\"Name of the field to filter on (supports dot notation for nested fields)\",\n required=True,\n ),\n DropdownInput(\n name=\"operator\",\n display_name=\"Operator\",\n info=\"Operator to use for filtering\",\n options=[\n \"equals\",\n \"not_equals\",\n \"contains\",\n \"not_contains\",\n \"greater_than\",\n \"less_than\",\n \"in\",\n \"not_in\",\n ],\n value=\"equals\",\n ),\n StrInput(\n name=\"value\",\n display_name=\"Filter Value\",\n info=\"Value to filter against\",\n required=True,\n ),\n BoolInput(\n name=\"case_sensitive\",\n display_name=\"Case Sensitive\",\n info=\"Whether string comparisons should be case-sensitive\",\n value=False,\n ),\n BoolInput(\n name=\"return_only_files\",\n display_name=\"Return Only Files\",\n info=\"If true, returns only array of file URLs\",\n value=False,\n ),\n StrInput(\n name=\"file_field\",\n display_name=\"File Field Name\",\n info=\"Name of the field containing file URL (only used if return_only_files is true)\",\n required=False,\n value=\"file_path\",\n ),\n ]\n\n outputs = [\n Output(\n name=\"filtered_array\",\n display_name=\"Filtered Array\",\n method=\"filter_array\",\n )\n ]\n\n def _compare_values(\n self,\n field_value: Any,\n filter_value: Any,\n operator: str,\n case_sensitive: bool = False,\n ) -> bool:\n \"\"\"Compare two values based on the specified operator.\"\"\"\n try:\n # Handle None values\n if field_value is None:\n logger.debug(\"Field value is None\")\n return False\n\n # Log the types and values for debugging\n logger.debug(\n f\"Comparing - Field value: {field_value} ({type(field_value)}) | Filter value: {filter_value} ({type(filter_value)})\"\n )\n\n # Handle string comparisons with case sensitivity\n if (\n isinstance(field_value, str)\n and isinstance(filter_value, str)\n and not case_sensitive\n ):\n field_value = field_value.lower()\n filter_value = filter_value.lower()\n logger.debug(\n f\"Case-insensitive comparison: {field_value} vs {filter_value}\"\n )\n\n if operator == \"equals\":\n result = field_value == filter_value\n logger.debug(f\"Equals comparison result: {result}\")\n return result\n if operator == \"not_equals\":\n return field_value != filter_value\n if operator == \"contains\":\n return str(filter_value) in str(field_value)\n if operator == \"not_contains\":\n return str(filter_value) not in str(field_value)\n if operator == \"greater_than\":\n return float(field_value) > float(filter_value)\n if operator == \"less_than\":\n return float(field_value) < float(filter_value)\n if operator == \"in\":\n if not isinstance(filter_value, (list, tuple)):\n filter_value = [filter_value]\n return field_value in filter_value\n if operator == \"not_in\":\n if not isinstance(filter_value, (list, tuple)):\n filter_value = [filter_value]\n return field_value not in filter_value\n logger.warning(f\"Unsupported operator: {operator}\")\n return False\n\n except (ValueError, TypeError) as e:\n logger.warning(f\"Error comparing values: {e!s}\")\n return False\n\n def _get_nested_field_value(self, obj: dict[str, Any], field_path: str) -> Any:\n \"\"\"Get value from nested dictionary using dot notation.\"\"\"\n try:\n current = obj\n for part in field_path.split(\".\"):\n current = current[part]\n return current\n except (KeyError, TypeError):\n return None\n\n def filter_array(self) -> Data:\n \"\"\"Filter the input array based on the specified conditions.\"\"\"\n try:\n # Extract array from input\n if not self.input_array:\n raise ValueError(\"Input must not be empty\")\n\n input_array = None\n if isinstance(self.input_array, list) and len(self.input_array) > 0:\n if isinstance(self.input_array[0], Data):\n data_obj = self.input_array[0]\n if isinstance(data_obj.data, dict) and \"value\" in data_obj.data:\n input_array = data_obj.data[\"value\"]\n else:\n input_array = self.input_array\n elif isinstance(self.input_array, Data):\n if (\n isinstance(self.input_array.data, dict)\n and \"value\" in self.input_array.data\n ):\n input_array = self.input_array.data[\"value\"]\n else:\n input_array = self.input_array.value\n\n if input_array is None:\n logger.error(\"Could not extract array from input\")\n return Data(value=[])\n\n logger.debug(f\"Processing array with {len(input_array)} items\")\n\n filtered_array = []\n for item in input_array:\n if not isinstance(item, dict):\n logger.debug(f\"Skipping non-dict item: {item}\")\n continue\n\n field_value = self._get_nested_field_value(item, self.field_name)\n if self._compare_values(\n field_value, self.value, self.operator, self.case_sensitive\n ):\n filtered_array.append(item)\n\n logger.info(\n f\"Filtered array from {len(input_array)} to {len(filtered_array)} items\"\n )\n\n # If return_only_files is true, extract just the file URLs\n if self.return_only_files and filtered_array:\n file_field = self.file_field or \"file\"\n files_array = []\n for item in filtered_array:\n file_url = self._get_nested_field_value(item, file_field)\n if file_url:\n files_array.append(file_url)\n logger.info(f\"Extracted {len(files_array)} file URLs\")\n return Data(data={\"file_path\": files_array})\n\n return Data(data={\"file_path\": filtered_array})\n\n except Exception as e:\n logger.error(f\"Error filtering array: {e!s}\")\n raise ValueError(f\"Error filtering array: {e!s}\")\n" + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "2024-06-01" }, - "field_name": { - "_input_type": "StrInput", + "azure_deployment": { + "_input_type": "MessageTextInput", "advanced": false, - "display_name": "Field Name", + "display_name": "Deployment Name", "dynamic": false, - "info": "Name of the field to filter on (supports dot notation for nested fields)", + "info": "", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "name": "field_name", + "name": "azure_deployment", "placeholder": "", "required": true, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "prediction" + "value": "GPT316k" }, - "file_field": { - "_input_type": "StrInput", + "azure_endpoint": { + "_input_type": "MessageTextInput", "advanced": false, - "display_name": "File Field Name", + "display_name": "Azure Endpoint", "dynamic": false, - "info": "Name of the field containing file URL (only used if return_only_files is true)", + "info": "Your Azure endpoint, including the resource. Example: `https://example-resource.azure.openai.com/`", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "name": "file_field", + "name": "azure_endpoint", "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "file_path" + "value": "https://cog-54p2emd7pu2vu.openai.azure.com/" }, - "input_array": { - "_input_type": "HandleInput", + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from langchain_openai import AzureChatOpenAI\n\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import MessageTextInput\nfrom langflow.io import DropdownInput, IntInput, SecretStrInput, SliderInput\n\n\nclass AzureChatOpenAIComponent(LCModelComponent):\n display_name: str = \"Azure OpenAI\"\n description: str = \"Generate text using Azure OpenAI LLMs.\"\n documentation: str = \"https://python.langchain.com/docs/integrations/llms/azure_openai\"\n beta = False\n icon = \"Azure\"\n name = \"AzureOpenAIModel\"\n\n AZURE_OPENAI_API_VERSIONS = [\n \"2024-06-01\",\n \"2024-07-01-preview\",\n \"2024-08-01-preview\",\n \"2024-09-01-preview\",\n \"2024-10-01-preview\",\n \"2023-05-15\",\n \"2023-12-01-preview\",\n \"2024-02-15-preview\",\n \"2024-03-01-preview\",\n \"2024-12-01-preview\",\n \"2025-01-01-preview\",\n \"2025-02-01-preview\",\n ]\n\n inputs = [\n *LCModelComponent._base_inputs,\n MessageTextInput(\n name=\"azure_endpoint\",\n display_name=\"Azure Endpoint\",\n info=\"Your Azure endpoint, including the resource. Example: `https://example-resource.azure.openai.com/`\",\n required=True,\n ),\n MessageTextInput(name=\"azure_deployment\", display_name=\"Deployment Name\", required=True),\n SecretStrInput(name=\"api_key\", display_name=\"Azure Chat OpenAI API Key\", required=True),\n DropdownInput(\n name=\"api_version\",\n display_name=\"API Version\",\n options=sorted(AZURE_OPENAI_API_VERSIONS, reverse=True),\n value=next(\n (\n version\n for version in sorted(AZURE_OPENAI_API_VERSIONS, reverse=True)\n if not version.endswith(\"-preview\")\n ),\n AZURE_OPENAI_API_VERSIONS[0],\n ),\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.7,\n range_spec=RangeSpec(min=0, max=2, step=0.01),\n info=\"Controls randomness. Lower values are more deterministic, higher values are more creative.\",\n advanced=True,\n ),\n IntInput(\n name=\"max_tokens\",\n display_name=\"Max Tokens\",\n advanced=True,\n info=\"The maximum number of tokens to generate. Set to 0 for unlimited tokens.\",\n ),\n ]\n\n def build_model(self) -> LanguageModel: # type: ignore[type-var]\n azure_endpoint = self.azure_endpoint\n azure_deployment = self.azure_deployment\n api_version = self.api_version\n api_key = self.api_key\n temperature = self.temperature\n max_tokens = self.max_tokens\n stream = self.stream\n\n try:\n output = AzureChatOpenAI(\n azure_endpoint=azure_endpoint,\n azure_deployment=azure_deployment,\n api_version=api_version,\n api_key=api_key,\n temperature=temperature,\n max_tokens=max_tokens or None,\n streaming=stream,\n )\n except Exception as e:\n msg = f\"Could not connect to AzureOpenAI API: {e}\"\n raise ValueError(msg) from e\n\n return output\n" + }, + "input_value": { + "_input_type": "MessageInput", "advanced": false, - "display_name": "Input Array", + "display_name": "Input", "dynamic": false, - "info": "Input array of JSON objects to filter", + "info": "", "input_types": [ - "Data" + "Message" ], - "list": true, + "list": false, "list_add_label": "Add More", - "name": "input_array", + "load_from_db": false, + "name": "input_value", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, + "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "other", + "type": "str", "value": "" }, - "operator": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Operator", + "max_tokens": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Max Tokens", "dynamic": false, - "info": "Operator to use for filtering", - "name": "operator", - "options": [ - "equals", - "not_equals", - "contains", - "not_contains", - "greater_than", - "less_than", - "in", - "not_in" - ], - "options_metadata": [], + "info": "The maximum number of tokens to generate. Set to 0 for unlimited tokens.", + "list": false, + "list_add_label": "Add More", + "name": "max_tokens", "placeholder": "", "required": false, "show": true, "title_case": false, - "toggle": false, "tool_mode": false, "trace_as_metadata": true, - "type": "str", - "value": "equals" + "type": "int", + "value": "" }, - "return_only_files": { + "stream": { "_input_type": "BoolInput", - "advanced": false, - "display_name": "Return Only Files", + "advanced": true, + "display_name": "Stream", "dynamic": false, - "info": "If true, returns only array of file URLs", + "info": "Stream the response from the model. Streaming works only in Chat.", "list": false, "list_add_label": "Add More", - "name": "return_only_files", + "name": "stream", "placeholder": "", "required": false, "show": true, @@ -906,70 +1130,101 @@ "tool_mode": false, "trace_as_metadata": true, "type": "bool", - "value": true + "value": false }, - "value": { - "_input_type": "StrInput", + "system_message": { + "_input_type": "MultilineInput", "advanced": false, - "display_name": "Filter Value", + "copy_field": false, + "display_name": "System Message", "dynamic": false, - "info": "Value to filter against", + "info": "System message to pass to the model.", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", "load_from_db": false, - "name": "value", + "multiline": true, + "name": "system_message", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "srf" + "value": "You are an AI assistant known for your accuracy and helpfulness. Carefully review the following clinical guidelines that are used to approve procedures for various medical conditions. Follow these specific instructions: 1. You must generate exactly **eight (8)** guidelines — no more, no less. 2. If there are **more than eight** guidelines in the context, **combine** and merge them logically so that the final output always contains **eight guidelines**. 3. If there are **fewer than eight** guidelines, **split** or expand them appropriately, ensuring that each original guideline is still represented, and the final count remains **eight guidelines**. 4. Each guideline MUST be **standalone** and **independent** and in a single line. 5. **Every guideline** provided in the context must be considered to answer the question. **Do not skip** or omit any. Ensure your final output always contains exactly **eight comprehensive guidelines**." + }, + "temperature": { + "_input_type": "SliderInput", + "advanced": true, + "display_name": "Temperature", + "dynamic": false, + "info": "Controls randomness. Lower values are more deterministic, higher values are more creative.", + "max_label": "", + "max_label_icon": "", + "min_label": "", + "min_label_icon": "", + "name": "temperature", + "placeholder": "", + "range_spec": { + "max": 2, + "min": 0, + "step": 0.01, + "step_type": "float" + }, + "required": false, + "show": true, + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "title_case": false, + "tool_mode": false, + "type": "slider", + "value": 0.7 } }, "tool_mode": false }, + "selected_output": "text_output", "showNode": true, - "type": "json_array_filter" + "type": "AzureOpenAIModel" }, - "dragging": false, - "id": "json_array_filter-8satn", + "id": "AzureOpenAIModel-qewCW", "measured": { - "height": 635, + "height": 616, "width": 320 }, "position": { - "x": 1448.7862802792056, - "y": 136.060018687435 + "x": 3830.6986185110973, + "y": 538.0309378753839 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "srf-extraction-S3eHF", + "description": "Sends text output via API.", + "display_name": "Text Output", + "id": "TextOutput-rUiuT", "node": { "base_classes": [ - "Data" + "Message" ], "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Model for extracting SRF from medical images", - "display_name": "SRF Extraction", - "documentation": "http://docs.langflow.org/components/custom", + "description": "Sends text output via API.", + "display_name": "Text Output", + "documentation": "https://docs.langflow.org/components-io#text-output", "edited": false, "field_order": [ - "file_path", - "silent_errors", - "delete_server_file_after_processing", - "ignore_unsupported_extensions", - "ignore_unspecified_files", - "file_path" + "input_value" ], "frozen": false, - "icon": "Autonomize", + "icon": "type", "legacy": false, "metadata": {}, "minimized": false, @@ -978,16 +1233,14 @@ { "allows_loop": false, "cache": true, - "display_name": "SRF Extraction", - "hidden": null, - "method": "process_srf", - "name": "srfExtraction", - "options": null, - "required_inputs": [], - "selected": "Data", + "display_name": "Output Text", + "group_outputs": false, + "method": "text_response", + "name": "text", + "selected": "Message", "tool_mode": true, "types": [ - "Data" + "Message" ], "value": "__UNDEFINED__" } @@ -1011,123 +1264,158 @@ "show": true, "title_case": false, "type": "code", - "value": "import os\nimport tempfile\nfrom pathlib import Path\nfrom typing import Any\nfrom urllib.parse import urlparse\n\nimport aiohttp\nfrom loguru import logger\n\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.base.data.utils import IMG_FILE_TYPES\nfrom app.base.modelhub import ATModelComponent\nfrom langflow.io import Output, HandleInput\nfrom langflow.schema import Data\nfrom app.services.modelhub.model_endpoint import ModelEndpoint\n\n\nclass SRFExtractionComponent(\n ATModelComponent, BaseFileComponent\n): # Changed inheritance order\n display_name = \"SRF Extraction\"\n description = \"Model for extracting SRF from medical images\"\n documentation: str = \"http://docs.langflow.org/components/custom\"\n icon = \"Autonomize\"\n name = \"srf-extraction\"\n _model_name = ModelEndpoint.SRF_EXTRACTION\n\n VALID_EXTENSIONS = IMG_FILE_TYPES\n\n inputs = [\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"file_path\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"silent_errors\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"delete_server_file_after_processing\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unsupported_extensions\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unspecified_files\"\n ),\n HandleInput(\n name=\"file_path\",\n display_name=\"URL\",\n info=(\n \"Upload file via URL or local server path. Supports: \\n\"\n \"1. Direct HTTP/HTTPS URLs for remote files\\n\"\n \"2. Local server file paths\\n\"\n \"3. Data objects with file path property\\n\"\n \"4. Message objects containing file paths\\n\"\n \"\\nSupports the same file types as the Path input. \"\n \"Takes precedence over Path input when both are provided.\"\n ),\n required=False,\n input_types=[\"Data\", \"Message\"],\n is_list=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"srfExtraction\", display_name=\"SRF Extraction\", method=\"process_srf\"\n ),\n ]\n\n # Explicitly define the endpoint\n def __init__(self, **kwargs):\n ATModelComponent.__init__(self, **kwargs) # Initialize ATModelComponent first\n BaseFileComponent.__init__(self, **kwargs) # Then initialize BaseFileComponent\n self._modelhub_service = None\n self.temp_dir = tempfile.mkdtemp()\n self._downloaded_files = {}\n self._endpoint = \"https://tolstoy-v2.modelhub.sprint.autonomize.dev/v1/models/extraction:predict\"\n\n async def _download_file_from_url(self, url: str) -> str | None:\n \"\"\"Download a file from a URL.\"\"\"\n try:\n filename = os.path.basename(urlparse(url).path)\n if not filename:\n filename = \"downloaded_image.png\"\n\n local_path = os.path.join(self.temp_dir, filename)\n\n async with aiohttp.ClientSession() as session:\n async with session.get(url) as response:\n response.raise_for_status()\n with open(local_path, \"wb\") as f:\n while True:\n chunk = await response.content.read(8192)\n if not chunk:\n break\n f.write(chunk)\n\n self._downloaded_files[url] = local_path\n logger.info(f\"Successfully downloaded file to {local_path}\")\n return local_path\n\n except Exception as e:\n logger.error(f\"Error downloading file from URL: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n async def _validate_and_resolve_paths_async(\n self,\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Handle URLs and local paths asynchronously.\"\"\"\n resolved_files = []\n file_paths = self._file_path_as_list()\n\n for obj in file_paths:\n server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)\n\n if not server_file_path:\n if not self.ignore_unspecified_files:\n msg = f\"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property.\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n try:\n # Handle if server_file_path is a list\n paths_to_process = (\n server_file_path\n if isinstance(server_file_path, list)\n else [server_file_path]\n )\n\n for path in paths_to_process:\n try:\n # Check if it's a URL\n if isinstance(path, str) and path.startswith(\n (\"http://\", \"https://\")\n ):\n local_path = await self._download_file_from_url(path)\n if not local_path:\n continue\n\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": path,\n }\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=True,\n )\n )\n else:\n # Handle local files\n resolved_path = Path(self.resolve_path(str(path)))\n if not resolved_path.exists():\n msg = f\"File not found: {path}\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n obj,\n resolved_path,\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n\n except Exception as e:\n logger.error(f\"Error processing path {path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n except Exception as e:\n logger.error(f\"Error processing file paths: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n return resolved_files\n\n # async def _read_file(self, file_path: Path) -> bytes:\n # \"\"\"Read file content from a file path\"\"\"\n # try:\n # with open(file_path, \"rb\") as f:\n # return f.read()\n # except Exception as e:\n # logger.error(f\"Error reading file {file_path}: {e!s}\")\n # if not self.silent_errors:\n # raise ValueError(f\"Error reading file: {e!s}\") from e\n # return None\n\n async def extract_srf(self, file_path: Path) -> Any:\n \"\"\"Extract SRF from the image\"\"\"\n try:\n logger.debug(\"Sending request to model endpoint\")\n # response = await self.predict(\n # endpoint=self._endpoint,\n # binary_data=image_data,\n # content_type=\"image/png\"\n # )\n response = await self.predict(\n endpoint=self._endpoint, file_path=file_path, content_type=\"image/png\"\n )\n logger.debug(f\"Model response: {response}\")\n return response\n except Exception as e:\n logger.error(f\"Error in SRF extraction: {e!s}\")\n if not self.silent_errors:\n raise ValueError(f\"Error in SRF extraction: {e!s}\") from e\n return None\n\n def process_files(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process the files as required by BaseFileComponent\"\"\"\n if not file_list:\n msg = \"No files to process.\"\n if not self.silent_errors:\n raise ValueError(msg)\n logger.warning(msg)\n return file_list\n\n async def process_srf(self) -> Data:\n \"\"\"Process multiple images and generate merged SRF extraction results\"\"\"\n try:\n # Use async validation that handles URLs\n files = await self._validate_and_resolve_paths_async()\n if not files:\n msg = \"No valid files provided\"\n logger.warning(msg)\n if not self.silent_errors:\n raise ValueError(msg)\n return Data(value={})\n\n combined_results = {} # For storing merged results\n results = []\n\n for file in files:\n try:\n # Read the file\n # image_data = await self._read_file(file.path)\n # if not image_data:\n # msg = f\"Could not read file: {file.path}\"\n # logger.warning(msg)\n # continue\n\n # Extract SRF from the image\n result = await self.extract_srf(file.path)\n logger.debug(f\"Model result: {result}\")\n\n if result and isinstance(result, dict):\n original_url = file.data[0].data.get(\n \"original_url\", str(file.path)\n )\n\n # Extract Doc_Info, handling different response structures\n doc_info = {}\n if \"data\" in result:\n if isinstance(result[\"data\"], dict):\n if (\n \"data\" in result[\"data\"]\n and \"Doc_Info\" in result[\"data\"][\"data\"]\n ):\n doc_info = result[\"data\"][\"data\"][\"Doc_Info\"]\n elif \"Doc_Info\" in result[\"data\"]:\n doc_info = result[\"data\"][\"Doc_Info\"]\n else:\n doc_info = {\"extraction\": result[\"data\"]}\n\n logger.debug(f\"Extracted doc_info: {doc_info}\")\n\n # Merge Doc_Info with combined results\n for key, value in doc_info.items():\n # If key doesn't exist or current value is empty/null, use new value\n if key not in combined_results or combined_results[key] in (\n None,\n \"\",\n \"null\",\n \"Information not found\",\n \"Information not\",\n ):\n combined_results[key] = value\n logger.debug(\n f\"Added/Updated key {key} with value {value}\"\n )\n\n # Store individual result\n results.append(\n {\n \"file_path\": original_url,\n \"prediction\": {\"Doc_Info\": doc_info},\n }\n )\n\n # Handle cleanup\n if file.delete_after_processing and file.path.exists():\n try:\n os.remove(file.path)\n except Exception as e:\n logger.warning(f\"Failed to delete file {file.path}: {e!s}\")\n\n except Exception as e:\n logger.error(f\"Error processing file {file.path}: {e!s}\")\n if not self.silent_errors:\n continue\n logger.warning(\"Continuing to next file due to silent_errors=True\")\n\n if not results:\n msg = \"No successful extractions\"\n logger.warning(msg)\n return Data(value={})\n\n # Return both combined results and individual results\n logger.info(f\"Successfully processed {len(results)} files\")\n # final_result = {\n # \"combined_results\": combined_results,\n # \"results\": results\n # }\n logger.debug(f\"Final result: {combined_results}\")\n return Data(value=combined_results)\n\n except Exception as e:\n logger.error(f\"Error processing images: {e!s}\")\n if not self.silent_errors:\n raise ValueError(f\"Error processing images: {e!s}\")\n return Data(value={})\n\n def __del__(self):\n \"\"\"Clean up temporary files.\"\"\"\n if (\n hasattr(self, \"temp_dir\")\n and self.temp_dir\n and os.path.exists(self.temp_dir)\n ):\n try:\n for file_path in self._downloaded_files.values():\n if os.path.exists(file_path):\n os.remove(file_path)\n os.rmdir(self.temp_dir)\n except Exception as e:\n logger.error(f\"Error cleaning up temporary files: {e!s}\")\n" + "value": "from langflow.base.io.text import TextComponent\nfrom langflow.io import MultilineInput, Output\nfrom langflow.schema.message import Message\n\n\nclass TextOutputComponent(TextComponent):\n display_name = \"Text Output\"\n description = \"Sends text output via API.\"\n documentation: str = \"https://docs.langflow.org/components-io#text-output\"\n icon = \"type\"\n name = \"TextOutput\"\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Text to be passed as output.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\n ]\n\n def text_response(self) -> Message:\n message = Message(\n text=self.input_value,\n )\n self.status = self.input_value\n return message\n" }, - "delete_server_file_after_processing": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Delete Server File After Processing", - "dynamic": false, - "info": "If true, the Server File Path will be deleted after processing.", - "list": false, - "list_add_label": "Add More", - "name": "delete_server_file_after_processing", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "file_path": { - "_input_type": "HandleInput", + "input_value": { + "_input_type": "MultilineInput", "advanced": false, - "display_name": "URL", + "copy_field": false, + "display_name": "Inputs", "dynamic": false, - "info": "Upload file via URL or local server path. Supports: \n1. Direct HTTP/HTTPS URLs for remote files\n2. Local server file paths\n3. Data objects with file path property\n4. Message objects containing file paths\n\nSupports the same file types as the Path input. Takes precedence over Path input when both are provided.", + "info": "Text to be passed as output.", "input_types": [ - "Data", "Message" ], - "list": true, - "list_add_label": "Add More", - "name": "file_path", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "trace_as_metadata": true, - "type": "other", - "value": "" - }, - "ignore_unspecified_files": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Ignore Unspecified Files", - "dynamic": false, - "info": "If true, Data with no 'file_path' property will be ignored.", "list": false, "list_add_label": "Add More", - "name": "ignore_unspecified_files", + "load_from_db": false, + "multiline": true, + "name": "input_value", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "ignore_unsupported_extensions": { - "_input_type": "BoolInput", + "type": "str", + "value": "" + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "TextOutput" + }, + "id": "TextOutput-rUiuT", + "measured": { + "height": 204, + "width": 320 + }, + "position": { + "x": 4477.514419223873, + "y": 451.1728160653827 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "TextInput-vut4E", + "node": { + "base_classes": [ + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Get user text inputs.", + "display_name": "Text Input", + "documentation": "https://docs.langflow.org/components-io#text-input", + "edited": false, + "field_order": [ + "input_value" + ], + "frozen": false, + "icon": "type", + "legacy": false, + "metadata": {}, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Output Text", + "group_outputs": false, + "method": "text_response", + "name": "text", + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "code": { "advanced": true, - "display_name": "Ignore Unsupported Extensions", - "dynamic": false, - "info": "If true, files with unsupported extensions will not be processed.", + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", "list": false, - "list_add_label": "Add More", - "name": "ignore_unsupported_extensions", + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "code", + "value": "from langflow.base.io.text import TextComponent\nfrom langflow.io import MultilineInput, Output\nfrom langflow.schema.message import Message\n\n\nclass TextInputComponent(TextComponent):\n display_name = \"Text Input\"\n description = \"Get user text inputs.\"\n documentation: str = \"https://docs.langflow.org/components-io#text-input\"\n icon = \"type\"\n name = \"TextInput\"\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Text to be passed as input.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\n ]\n\n def text_response(self) -> Message:\n return Message(\n text=self.input_value,\n )\n" }, - "silent_errors": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Silent Errors", + "input_value": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "Text", "dynamic": false, - "info": "If true, errors will not raise an exception.", + "info": "Text to be passed as input.", + "input_types": [ + "Message" + ], "list": false, "list_add_label": "Add More", - "name": "silent_errors", + "load_from_db": false, + "multiline": true, + "name": "input_value", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "bool", - "value": false + "type": "str", + "value": "" } }, "tool_mode": false }, "showNode": true, - "type": "srf-extraction" + "type": "TextInput" }, "dragging": false, - "id": "srf-extraction-S3eHF", + "id": "TextInput-vut4E", "measured": { - "height": 196, + "height": 204, "width": 320 }, "position": { - "x": 1882.085721576359, - "y": 366.5036233101823 + "x": 767.7455374483175, + "y": 292.9490078710027 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "JSONOutput-Kpx5X", + "id": "TextInput-gMfph", "node": { "base_classes": [ "Message" @@ -1135,16 +1423,15 @@ "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Display input data as JSON in the Playground.", - "display_name": "JSON Output", - "documentation": "", + "description": "Get user text inputs.", + "display_name": "Text Input", + "documentation": "https://docs.langflow.org/components-io#text-input", "edited": false, "field_order": [ - "data", - "pretty_print" + "input_value" ], "frozen": false, - "icon": "Braces", + "icon": "type", "legacy": false, "metadata": {}, "minimized": false, @@ -1153,10 +1440,10 @@ { "allows_loop": false, "cache": true, - "display_name": "JSON", + "display_name": "Output Text", "group_outputs": false, - "method": "json_response", - "name": "json", + "method": "text_response", + "name": "text", "selected": "Message", "tool_mode": true, "types": [ @@ -1184,20 +1471,23 @@ "show": true, "title_case": false, "type": "code", - "value": "import json\n\nfrom langflow.base.io.text import TextComponent\nfrom langflow.inputs import DataInput\nfrom langflow.io import BoolInput, Output\nfrom langflow.schema import Data\nfrom langflow.schema.message import Message\n\n\nclass JSONOutputComponent(TextComponent):\n display_name = \"JSON Output\"\n description = \"Display input data as JSON in the Playground.\"\n icon = \"Braces\"\n name = \"JSONOutput\"\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The data to convert to JSON.\",\n is_list=True,\n ),\n BoolInput(\n name=\"pretty_print\",\n display_name=\"Pretty Print\",\n info=\"Format JSON with proper indentation\",\n value=True,\n advanced=True,\n ),\n ]\n outputs = [\n Output(display_name=\"JSON\", name=\"json\", method=\"json_response\"),\n ]\n\n def _process_data(self, data: Data | list[Data]) -> dict | list:\n \"\"\"Convert Data object(s) to dictionary/list format.\"\"\"\n if isinstance(data, list):\n return [item.dict() for item in data]\n return data.dict()\n\n def json_response(self) -> Message:\n try:\n # Process the Data input\n processed_data = self._process_data(self.data)\n\n # Convert to JSON string with optional pretty printing\n if self.pretty_print:\n formatted_json = json.dumps(\n processed_data, indent=2, ensure_ascii=False\n )\n else:\n formatted_json = json.dumps(processed_data, ensure_ascii=False)\n\n message = Message(text=formatted_json)\n self.status = formatted_json\n return message\n\n except Exception as e:\n error_message = f\"Error processing data to JSON: {e!s}\"\n message = Message(text=error_message)\n self.status = error_message\n return message\n" + "value": "from langflow.base.io.text import TextComponent\nfrom langflow.io import MultilineInput, Output\nfrom langflow.schema.message import Message\n\n\nclass TextInputComponent(TextComponent):\n display_name = \"Text Input\"\n description = \"Get user text inputs.\"\n documentation: str = \"https://docs.langflow.org/components-io#text-input\"\n icon = \"type\"\n name = \"TextInput\"\n\n inputs = [\n MultilineInput(\n name=\"input_value\",\n display_name=\"Text\",\n info=\"Text to be passed as input.\",\n ),\n ]\n outputs = [\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\n ]\n\n def text_response(self) -> Message:\n return Message(\n text=self.input_value,\n )\n" }, - "data": { - "_input_type": "DataInput", + "input_value": { + "_input_type": "MultilineInput", "advanced": false, - "display_name": "Data", + "copy_field": false, + "display_name": "Text", "dynamic": false, - "info": "The data to convert to JSON.", + "info": "Text to be passed as input.", "input_types": [ - "Data" + "Message" ], - "list": true, + "list": false, "list_add_label": "Add More", - "name": "data", + "load_from_db": false, + "multiline": true, + "name": "input_value", "placeholder": "", "required": false, "show": true, @@ -1205,59 +1495,43 @@ "tool_mode": false, "trace_as_input": true, "trace_as_metadata": true, - "type": "other", + "type": "str", "value": "" - }, - "pretty_print": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Pretty Print", - "dynamic": false, - "info": "Format JSON with proper indentation", - "list": false, - "list_add_label": "Add More", - "name": "pretty_print", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true } }, "tool_mode": false }, "showNode": true, - "type": "JSONOutput" + "type": "TextInput" }, "dragging": false, - "id": "JSONOutput-Kpx5X", + "id": "TextInput-gMfph", "measured": { - "height": 196, + "height": 204, "width": 320 }, "position": { - "x": 2406.014845365142, - "y": 338.50655048672877 + "x": 816.1214718555798, + "y": 615.4552372527512 }, - "selected": true, + "selected": false, "type": "genericNode" } ], "viewport": { - "x": -582.7254771769976, - "y": 146.34836116613457, - "zoom": 0.596265873903947 + "x": -1725.2610662596394, + "y": -53.59018771911724, + "zoom": 0.5542806494779478 } }, - "description": "Prior Auth Form Extraction Agent", + "description": "Language Models, Unleashed.", "endpoint_name": null, + "id": "ca41a46b-01ca-48af-a6f9-ba77920af7b6", "is_component": false, - "last_tested_version": "1.4.3", - "name": "Prior Auth Form Extraction Agent", + "last_tested_version": "1.6.3", + "name": "Prior Auth Extraction ", "tags": [ - "prior-auth" + "prior-auth", + "chart-review" ] } \ No newline at end of file diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Relation Extraction Agent.json b/src/backend/base/langflow/initial_setup/starter_projects/Relation Extraction Agent.json index 0fa3ade595a1..9e048dc02eff 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Relation Extraction Agent.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Relation Extraction Agent.json @@ -6,228 +6,204 @@ "className": "", "data": { "sourceHandle": { - "dataType": "ParseData", - "id": "ParseData-u3h2k", - "name": "text", + "dataType": "ParserComponent", + "id": "ParserComponent-SJrwu", + "name": "parsed_text", "output_types": [ "Message" ] }, "targetHandle": { - "fieldName": "search_query", - "id": "ClinicalLLM-Q2Siz", + "fieldName": "input_value", + "id": "Agent-d22BR", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-ParseData-u3h2k{œdataTypeœ:œParseDataœ,œidœ:œParseData-u3h2kœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-ClinicalLLM-Q2Siz{œfieldNameœ:œsearch_queryœ,œidœ:œClinicalLLM-Q2Sizœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-ParserComponent-SJrwu{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-SJrwuœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Agent-d22BR{œfieldNameœ:œinput_valueœ,œidœ:œAgent-d22BRœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "ParseData-u3h2k", - "sourceHandle": "{œdataTypeœ: œParseDataœ, œidœ: œParseData-u3h2kœ, œnameœ: œtextœ, œoutput_typesœ: [œMessageœ]}", - "target": "ClinicalLLM-Q2Siz", - "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œClinicalLLM-Q2Sizœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "ParserComponent-SJrwu", + "sourceHandle": "{œdataTypeœ:œParserComponentœ,œidœ:œParserComponent-SJrwuœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}", + "target": "Agent-d22BR", + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œAgent-d22BRœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "ClinicalLLM", - "id": "ClinicalLLM-Q2Siz", - "name": "prediction", + "dataType": "AutonomizeModel", + "id": "AutonomizeModel-PPDxo", + "name": "component_as_tool", "output_types": [ - "Data" + "Tool" ] }, "targetHandle": { - "fieldName": "data", - "id": "ParseData-gZwky", + "fieldName": "tools", + "id": "Agent-d22BR", "inputTypes": [ - "Data" + "Tool" ], "type": "other" } }, - "id": "reactflow__edge-ClinicalLLM-Q2Siz{œdataTypeœ:œClinicalLLMœ,œidœ:œClinicalLLM-Q2Sizœ,œnameœ:œpredictionœ,œoutput_typesœ:[œDataœ]}-ParseData-gZwky{œfieldNameœ:œdataœ,œidœ:œParseData-gZwkyœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-AutonomizeModel-PPDxo{œdataTypeœ:œAutonomizeModelœ,œidœ:œAutonomizeModel-PPDxoœ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}-Agent-d22BR{œfieldNameœ:œtoolsœ,œidœ:œAgent-d22BRœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}", "selected": false, - "source": "ClinicalLLM-Q2Siz", - "sourceHandle": "{œdataTypeœ: œClinicalLLMœ, œidœ: œClinicalLLM-Q2Sizœ, œnameœ: œpredictionœ, œoutput_typesœ: [œDataœ]}", - "target": "ParseData-gZwky", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œParseData-gZwkyœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "source": "AutonomizeModel-PPDxo", + "sourceHandle": "{œdataTypeœ:œAutonomizeModelœ,œidœ:œAutonomizeModel-PPDxoœ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}", + "target": "Agent-d22BR", + "targetHandle": "{œfieldNameœ:œtoolsœ,œidœ:œAgent-d22BRœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "File Path", - "id": "File Path-8jQBJ", - "name": "file_path", + "dataType": "Agent", + "id": "Agent-d22BR", + "name": "response", "output_types": [ - "Data" + "Message" ] }, "targetHandle": { - "fieldName": "file_path", - "id": "azure_ocr-KoHV8", + "fieldName": "input_value", + "id": "ChatOutput-jYmRf", "inputTypes": [ "Data", + "DataFrame", "Message" ], "type": "other" } }, - "id": "reactflow__edge-File Path-8jQBJ{œdataTypeœ:œFile Pathœ,œidœ:œFile Path-8jQBJœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}-azure_ocr-KoHV8{œfieldNameœ:œfile_pathœ,œidœ:œazure_ocr-KoHV8œ,œinputTypesœ:[œDataœ,œMessageœ],œtypeœ:œotherœ}", - "source": "File Path-8jQBJ", - "sourceHandle": "{œdataTypeœ: œFile Pathœ, œidœ: œFile Path-8jQBJœ, œnameœ: œfile_pathœ, œoutput_typesœ: [œDataœ]}", - "target": "azure_ocr-KoHV8", - "targetHandle": "{œfieldNameœ: œfile_pathœ, œidœ: œazure_ocr-KoHV8œ, œinputTypesœ: [œDataœ, œMessageœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-Agent-d22BR{œdataTypeœ:œAgentœ,œidœ:œAgent-d22BRœ,œnameœ:œresponseœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-jYmRf{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-jYmRfœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}", + "selected": false, + "source": "Agent-d22BR", + "sourceHandle": "{œdataTypeœ:œAgentœ,œidœ:œAgent-d22BRœ,œnameœ:œresponseœ,œoutput_typesœ:[œMessageœ]}", + "target": "ChatOutput-jYmRf", + "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-jYmRfœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "azure_ocr", - "id": "azure_ocr-KoHV8", + "dataType": "AzureDocumentIntelligence", + "id": "AzureDocumentIntelligence-J3BuB", "name": "structured_data", "output_types": [ - "Data" - ] - }, - "targetHandle": { - "fieldName": "data", - "id": "ParseData-u3h2k", - "inputTypes": [ - "Data" - ], - "type": "other" - } - }, - "id": "reactflow__edge-azure_ocr-KoHV8{œdataTypeœ:œazure_ocrœ,œidœ:œazure_ocr-KoHV8œ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataœ]}-ParseData-u3h2k{œfieldNameœ:œdataœ,œidœ:œParseData-u3h2kœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "azure_ocr-KoHV8", - "sourceHandle": "{œdataTypeœ: œazure_ocrœ, œidœ: œazure_ocr-KoHV8œ, œnameœ: œstructured_dataœ, œoutput_typesœ: [œDataœ]}", - "target": "ParseData-u3h2k", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œParseData-u3h2kœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" - }, - { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "ParseData", - "id": "ParseData-gZwky", - "name": "data_list", - "output_types": [ - "Data" + "DataFrame" ] }, "targetHandle": { - "fieldName": "data", - "id": "RelationExtraction-TvyqO", + "fieldName": "input_data", + "id": "ParserComponent-SJrwu", "inputTypes": [ + "DataFrame", "Data" ], "type": "other" } }, - "id": "xy-edge__ParseData-gZwky{œdataTypeœ:œParseDataœ,œidœ:œParseData-gZwkyœ,œnameœ:œdata_listœ,œoutput_typesœ:[œDataœ]}-RelationExtraction-TvyqO{œfieldNameœ:œdataœ,œidœ:œRelationExtraction-TvyqOœ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "ParseData-gZwky", - "sourceHandle": "{œdataTypeœ: œParseDataœ, œidœ: œParseData-gZwkyœ, œnameœ: œdata_listœ, œoutput_typesœ: [œDataœ]}", - "target": "RelationExtraction-TvyqO", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œRelationExtraction-TvyqOœ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-AzureDocumentIntelligence-J3BuB{œdataTypeœ:œAzureDocumentIntelligenceœ,œidœ:œAzureDocumentIntelligence-J3BuBœ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataFrameœ]}-ParserComponent-SJrwu{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-SJrwuœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", + "selected": false, + "source": "AzureDocumentIntelligence-J3BuB", + "sourceHandle": "{œdataTypeœ:œAzureDocumentIntelligenceœ,œidœ:œAzureDocumentIntelligence-J3BuBœ,œnameœ:œstructured_dataœ,œoutput_typesœ:[œDataFrameœ]}", + "target": "ParserComponent-SJrwu", + "targetHandle": "{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-SJrwuœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}" }, { "animated": false, "className": "", "data": { "sourceHandle": { - "dataType": "RelationExtraction", - "id": "RelationExtraction-TvyqO", - "name": "data_list", + "dataType": "BlobStorage", + "id": "BlobStorage-Yg2WN", + "name": "file_path", "output_types": [ "Data" ] }, "targetHandle": { - "fieldName": "data", - "id": "JSONOutput-Ru3z5", + "fieldName": "url", + "id": "AzureDocumentIntelligence-J3BuB", "inputTypes": [ - "Data" + "str", + "Data", + "Message", + "list" ], "type": "other" } }, - "id": "xy-edge__RelationExtraction-TvyqO{œdataTypeœ:œRelationExtractionœ,œidœ:œRelationExtraction-TvyqOœ,œnameœ:œdata_listœ,œoutput_typesœ:[œDataœ]}-JSONOutput-Ru3z5{œfieldNameœ:œdataœ,œidœ:œJSONOutput-Ru3z5œ,œinputTypesœ:[œDataœ],œtypeœ:œotherœ}", - "source": "RelationExtraction-TvyqO", - "sourceHandle": "{œdataTypeœ: œRelationExtractionœ, œidœ: œRelationExtraction-TvyqOœ, œnameœ: œdata_listœ, œoutput_typesœ: [œDataœ]}", - "target": "JSONOutput-Ru3z5", - "targetHandle": "{œfieldNameœ: œdataœ, œidœ: œJSONOutput-Ru3z5œ, œinputTypesœ: [œDataœ], œtypeœ: œotherœ}" + "id": "reactflow__edge-BlobStorage-Yg2WN{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-Yg2WNœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}-AzureDocumentIntelligence-J3BuB{œfieldNameœ:œurlœ,œidœ:œAzureDocumentIntelligence-J3BuBœ,œinputTypesœ:[œstrœ,œDataœ,œMessageœ,œlistœ],œtypeœ:œotherœ}", + "selected": false, + "source": "BlobStorage-Yg2WN", + "sourceHandle": "{œdataTypeœ:œBlobStorageœ,œidœ:œBlobStorage-Yg2WNœ,œnameœ:œfile_pathœ,œoutput_typesœ:[œDataœ]}", + "target": "AzureDocumentIntelligence-J3BuB", + "targetHandle": "{œfieldNameœ:œurlœ,œidœ:œAzureDocumentIntelligence-J3BuBœ,œinputTypesœ:[œstrœ,œDataœ,œMessageœ,œlistœ],œtypeœ:œotherœ}" } ], "nodes": [ { "data": { - "id": "ParseData-u3h2k", + "id": "AzureDocumentIntelligence-J3BuB", "node": { "base_classes": [ - "Data", - "Message" + "DataFrame" ], "beta": false, + "category": "models", "conditional_paths": [], "custom_fields": {}, - "description": "Convert Data objects into Messages using any {field_name} from input data.", - "display_name": "Parse Data", - "documentation": "", + "description": "Process documents using Azure Document Intelligence (formerly Form Recognizer) for OCR, form extraction, and document analysis", + "display_name": "Azure Document Intelligence", + "documentation": "https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/", "edited": false, "field_order": [ - "data", - "template", - "sep" + "url", + "file_path", + "silent_errors", + "delete_server_file_after_processing", + "ignore_unsupported_extensions", + "ignore_unspecified_files", + "model_type", + "extract_tables", + "include_confidence", + "use_multithreading", + "concurrency_multithreading" ], "frozen": false, - "icon": "message-square", - "legacy": true, - "lf_version": "1.1.1", - "metadata": { - "legacy_name": "Parse Data" - }, + "icon": "Azure", + "key": "AzureDocumentIntelligence", + "legacy": false, + "lf_version": "1.6.3", + "metadata": {}, "minimized": false, "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "Message", - "group_outputs": false, - "method": "parse_data", - "name": "text", - "selected": "Message", - "tool_mode": true, - "types": [ - "Message" - ], - "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "Data List", + "display_name": "Structured Data", "group_outputs": false, - "method": "parse_data_as_list", - "name": "data_list", - "selected": "Data", + "method": "load_files", + "name": "structured_data", + "selected": "DataFrame", "tool_mode": true, "types": [ - "Data" + "DataFrame" ], "value": "__UNDEFINED__" } ], "pinned": false, + "priority": 3, + "score": 0.10489765225226892, "template": { "_type": "Component", "code": { @@ -246,126 +222,287 @@ "show": true, "title_case": false, "type": "code", - "value": "from langflow.custom.custom_component.component import Component\nfrom langflow.helpers.data import data_to_text, data_to_text_list\nfrom langflow.io import DataInput, MultilineInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.message import Message\n\n\nclass ParseDataComponent(Component):\n display_name = \"Data to Message\"\n description = \"Convert Data objects into Messages using any {field_name} from input data.\"\n icon = \"message-square\"\n name = \"ParseData\"\n legacy = True\n replacement = [\"processing.DataOperations\", \"processing.TypeConverterComponent\"]\n metadata = {\n \"legacy_name\": \"Parse Data\",\n }\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The data to convert to text.\",\n is_list=True,\n required=True,\n ),\n MultilineInput(\n name=\"template\",\n display_name=\"Template\",\n info=\"The template to use for formatting the data. \"\n \"It can contain the keys {text}, {data} or any other key in the Data.\",\n value=\"{text}\",\n required=True,\n ),\n StrInput(name=\"sep\", display_name=\"Separator\", advanced=True, value=\"\\n\"),\n ]\n\n outputs = [\n Output(\n display_name=\"Message\",\n name=\"text\",\n info=\"Data as a single Message, with each input Data separated by Separator\",\n method=\"parse_data\",\n ),\n Output(\n display_name=\"Data List\",\n name=\"data_list\",\n info=\"Data as a list of new Data, each having `text` formatted by Template\",\n method=\"parse_data_as_list\",\n ),\n ]\n\n def _clean_args(self) -> tuple[list[Data], str, str]:\n data = self.data if isinstance(self.data, list) else [self.data]\n template = self.template\n sep = self.sep\n return data, template, sep\n\n def parse_data(self) -> Message:\n data, template, sep = self._clean_args()\n result_string = data_to_text(template, data, sep)\n self.status = result_string\n return Message(text=result_string)\n\n def parse_data_as_list(self) -> list[Data]:\n data, template, _ = self._clean_args()\n text_list, data_list = data_to_text_list(template, data)\n for item, text in zip(data_list, text_list, strict=True):\n item.set_text(text)\n self.status = data_list\n return data_list\n" + "value": "\"\"\"Azure Document Intelligence Component - Form recognition and document processing.\"\"\"\n\nimport asyncio\nimport concurrent.futures\nimport mimetypes\nimport os\nimport tempfile\nfrom pathlib import Path\nfrom urllib.parse import unquote, urlparse\n\nimport aiohttp\nimport requests\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.io import BoolInput, DropdownInput, HandleInput, IntInput, Output\nfrom langflow.schema.data import Data\nfrom loguru import logger\n\n\nclass AzureDocumentIntelligenceComponent(BaseFileComponent):\n \"\"\"Component for Azure Document Intelligence - advanced document processing and form recognition.\"\"\"\n\n display_name: str = \"Azure Document Intelligence\"\n description: str = \"Process documents using Azure Document Intelligence (formerly Form Recognizer) for OCR, form extraction, and document analysis\"\n documentation: str = \"https://docs.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/\"\n icon: str = \"Azure\"\n name: str = \"AzureDocumentIntelligence\"\n category: str = \"models\"\n priority: int = 3 # High priority for document processing\n\n VALID_EXTENSIONS = [\"pdf\", \"jpg\", \"jpeg\", \"png\", \"bmp\", \"tiff\", \"tif\"]\n\n inputs = [\n HandleInput(\n name=\"url\",\n display_name=\"URL\",\n info=\"URL to the document to process\",\n input_types=[\"str\", \"Data\", \"Message\", \"list\"],\n required=False,\n ),\n # Include only the HandleInput and BoolInputs from base_inputs\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"file_path\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"silent_errors\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"delete_server_file_after_processing\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unsupported_extensions\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unspecified_files\"\n ),\n DropdownInput(\n name=\"model_type\",\n display_name=\"Model Type\",\n options=[\"prebuilt-document\", \"prebuilt-read\", \"prebuilt-layout\"],\n value=\"prebuilt-document\",\n info=\"Choose the Form Recognizer model to use\",\n ),\n BoolInput(\n name=\"extract_tables\",\n display_name=\"Extract Tables\",\n value=True,\n info=\"Extract and format tables from the document\",\n ),\n BoolInput(\n name=\"include_confidence\",\n display_name=\"Include Confidence Scores\",\n value=False,\n advanced=True,\n info=\"Include confidence scores in the extracted text\",\n ),\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"Use Concurrent Processing\",\n value=True,\n info=\"Enable concurrent processing of multiple files\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"Number of files to process concurrently\",\n value=2,\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Structured Data\", name=\"structured_data\", method=\"load_files\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self.temp_dir = tempfile.mkdtemp()\n self._downloaded_files = {}\n self._text_content = \"\"\n\n def get_text_content(self) -> str:\n \"\"\"Return the concatenated text content from all processed pages.\"\"\"\n return self._text_content\n\n def _extract_filename_from_url(self, url: str) -> str:\n \"\"\"Extract filename from URL or generate a default one.\"\"\"\n try:\n logger.debug(f\"Extracting filename from URL: {url}\")\n parsed_url = urlparse(url)\n path = unquote(parsed_url.path)\n filename = os.path.basename(path)\n\n if filename and \".\" in filename:\n logger.debug(f\"Found filename in URL path: {filename}\")\n return filename\n\n response = requests.head(url, allow_redirects=True)\n if \"content-disposition\" in response.headers:\n content_disp = response.headers[\"content-disposition\"]\n if \"filename=\" in content_disp:\n filename = content_disp.split(\"filename=\")[1].strip(\"\\\"'\")\n logger.debug(f\"Found filename in content-disposition: {filename}\")\n return filename\n\n if \"content-type\" in response.headers:\n ext = mimetypes.guess_extension(response.headers[\"content-type\"])\n if ext:\n filename = f\"downloaded{ext}\"\n logger.debug(f\"Generated filename from content-type: {filename}\")\n return filename\n\n logger.debug(\"Using default filename: downloaded.pdf\")\n return \"downloaded.pdf\"\n except Exception as e:\n logger.error(f\"Error extracting filename from URL: {e!s}\")\n return \"downloaded.pdf\"\n\n async def _download_file_from_url(self, url: str) -> str | None:\n \"\"\"Download a file from a URL.\"\"\"\n try:\n logger.debug(f\"Attempting to download file from URL: {url}\")\n filename = self._extract_filename_from_url(url)\n local_path = os.path.join(self.temp_dir, filename)\n logger.debug(f\"Local path for download: {local_path}\")\n\n async with aiohttp.ClientSession() as session:\n async with session.get(url) as response:\n response.raise_for_status()\n with open(local_path, \"wb\") as f:\n while True:\n chunk = await response.content.read(8192)\n if not chunk:\n break\n f.write(chunk)\n\n self._downloaded_files[url] = local_path\n logger.info(f\"Successfully downloaded file to {local_path}\")\n return local_path\n\n except Exception as e:\n logger.error(f\"Error downloading file from URL: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n def _extract_url_from_input(self, input_data) -> str | None:\n \"\"\"Extract URL string from various input types.\"\"\"\n logger.debug(f\"Extracting URL from input data type: {type(input_data)}\")\n\n # Handle list of Data objects (from blob storage)\n if isinstance(input_data, list):\n logger.debug(f\"Processing list input with {len(input_data)} items\")\n if input_data and isinstance(input_data[0], Data):\n url = input_data[0].data.get(\"file_path\")\n logger.debug(f\"Extracted URL from first Data object in list: {url}\")\n return url\n return None\n\n if isinstance(input_data, str):\n logger.debug(f\"Input is string: {input_data}\")\n return input_data\n elif isinstance(input_data, Data):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from Data object: {url}\")\n return url\n elif hasattr(input_data, \"text\"):\n logger.debug(f\"Extracted URL from text attribute: {input_data.text}\")\n return input_data.text\n elif hasattr(input_data, \"data\"):\n url = (\n input_data.data.get(\"file_path\")\n or input_data.data.get(\"url\")\n or input_data.text\n )\n logger.debug(f\"Extracted URL from data attribute: {url}\")\n return url\n logger.debug(\"No URL found in input data\")\n return None\n\n def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Handle URLs and local paths.\"\"\"\n resolved_files = []\n logger.debug(\"Starting path validation and resolution\")\n\n # Handle URL input if provided\n if hasattr(self, \"url\") and self.url:\n try:\n logger.debug(f\"Processing URL input: {self.url}\")\n # Extract URL from different input types\n url = self._extract_url_from_input(self.url)\n if not url:\n logger.warning(\"No valid URL found in input\")\n return resolved_files\n\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(url)\n )\n finally:\n loop.close()\n\n if local_path:\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": url,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n except Exception as e:\n logger.error(f\"Error processing URL {url}: {e!s}\")\n if not self.silent_errors:\n raise\n\n # Handle file_path input\n file_path = self._file_path_as_list()\n logger.debug(f\"Processing file_path input: {file_path}\")\n for obj in file_path:\n server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)\n logger.debug(f\"Processing server file path: {server_file_path}\")\n\n if not server_file_path:\n if not self.ignore_unspecified_files:\n msg = f\"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property.\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n try:\n # Check if it's a URL\n if isinstance(server_file_path, str) and server_file_path.startswith(\n (\"http://\", \"https://\")\n ):\n logger.debug(f\"Processing URL from file_path: {server_file_path}\")\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(server_file_path)\n )\n finally:\n loop.close()\n\n if not local_path:\n continue\n\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": server_file_path,\n }\n )\n logger.debug(\n f\"Created new Data object with local path: {local_path}\"\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n else:\n # Handle local files\n resolved_path = Path(self.resolve_path(str(server_file_path)))\n logger.debug(f\"Resolved local file path: {resolved_path}\")\n if not resolved_path.exists():\n msg = f\"File not found: {server_file_path}\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n obj,\n resolved_path,\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n\n except Exception as e:\n logger.error(f\"Error processing path {server_file_path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n logger.debug(f\"Resolved {len(resolved_files)} files\")\n return resolved_files\n\n async def process_file(\n self, file_path: str, *, silent_errors: bool = False\n ) -> tuple[Data, str]:\n \"\"\"Process a single file using the OCR service.\"\"\"\n try:\n from langflow.services.deps import get_document_intelligence_service\n \n # Create OCR service directly\n ocr_service = get_document_intelligence_service()\n\n with open(file_path, \"rb\") as file:\n file_content = file.read()\n\n extracted_content, plain_text = await ocr_service.process_document(\n file_content=file_content,\n model_type=self.model_type,\n include_confidence=self.include_confidence,\n extract_tables=self.extract_tables,\n )\n\n structured_data = Data(\n text=plain_text,\n data={\n self.SERVER_FILE_PATH_FIELDNAME: str(file_path),\n \"result\": extracted_content,\n },\n )\n\n return structured_data, plain_text\n\n except Exception as e:\n logger.error(f\"Error processing file {file_path}: {e!s}\")\n if not silent_errors:\n raise\n return None, \"\"\n\n def process_files(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process multiple files with concurrent processing.\"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n concurrency = (\n 1\n if not self.use_multithreading\n else max(1, self.concurrency_multithreading)\n )\n file_count = len(file_list)\n\n logger.info(f\"Processing {file_count} files with concurrency: {concurrency}\")\n\n all_plain_text = []\n processed_data = []\n\n if concurrency > 1 and file_count > 1:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n with concurrent.futures.ThreadPoolExecutor(\n max_workers=concurrency\n ) as executor:\n future_to_file = {\n executor.submit(\n lambda path: loop.run_until_complete(\n self.process_file(\n str(path), silent_errors=self.silent_errors\n )\n ),\n file.path,\n ): file\n for file in file_list\n }\n for future in concurrent.futures.as_completed(future_to_file):\n try:\n structured_data, plain_text = future.result()\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error in concurrent processing: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n else:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n for file in file_list:\n try:\n structured_data, plain_text = loop.run_until_complete(\n self.process_file(\n str(file.path), silent_errors=self.silent_errors\n )\n )\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error processing file {file.path}: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n\n # Store concatenated text content\n self._text_content = \"\\n\\n=== NEW DOCUMENT ===\\n\\n\".join(all_plain_text)\n\n return self.rollup_data(file_list, processed_data)\n\n def __del__(self):\n \"\"\"Cleanup temporary files and directory.\"\"\"\n try:\n if hasattr(self, \"temp_dir\") and os.path.exists(self.temp_dir):\n # Remove downloaded files\n for file_path in self._downloaded_files.values():\n if os.path.exists(file_path):\n os.unlink(file_path)\n # Remove the temporary directory\n os.rmdir(self.temp_dir)\n except Exception as e:\n logger.error(f\"Error cleaning up temporary files: {e!s}\")\n" + }, + "concurrency_multithreading": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Processing Concurrency", + "dynamic": false, + "info": "Number of files to process concurrently", + "list": false, + "list_add_label": "Add More", + "name": "concurrency_multithreading", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 2 + }, + "delete_server_file_after_processing": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Delete Server File After Processing", + "dynamic": false, + "info": "If true, the Server File Path will be deleted after processing.", + "list": false, + "list_add_label": "Add More", + "name": "delete_server_file_after_processing", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true }, - "data": { - "_input_type": "DataInput", + "extract_tables": { + "_input_type": "BoolInput", "advanced": false, - "display_name": "Data", + "display_name": "Extract Tables", + "dynamic": false, + "info": "Extract and format tables from the document", + "list": false, + "list_add_label": "Add More", + "name": "extract_tables", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "file_path": { + "_input_type": "HandleInput", + "advanced": true, + "display_name": "Server File Path", "dynamic": false, - "info": "The data to convert to text.", + "info": "Data object with a 'file_path' property pointing to server file or a Message object with a path to the file. Supercedes 'Path' but supports same file types.", "input_types": [ - "Data" + "Data", + "Message" ], "list": true, - "name": "data", + "list_add_label": "Add More", + "name": "file_path", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, "type": "other", "value": "" }, - "sep": { - "_input_type": "StrInput", + "ignore_unspecified_files": { + "_input_type": "BoolInput", "advanced": true, - "display_name": "Separator", + "display_name": "Ignore Unspecified Files", "dynamic": false, - "info": "", + "info": "If true, Data with no 'file_path' property will be ignored.", "list": false, - "load_from_db": false, - "name": "sep", + "list_add_label": "Add More", + "name": "ignore_unspecified_files", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + }, + "ignore_unsupported_extensions": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Ignore Unsupported Extensions", + "dynamic": false, + "info": "If true, files with unsupported extensions will not be processed.", + "list": false, + "list_add_label": "Add More", + "name": "ignore_unsupported_extensions", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "include_confidence": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Include Confidence Scores", + "dynamic": false, + "info": "Include confidence scores in the extracted text", + "list": false, + "list_add_label": "Add More", + "name": "include_confidence", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + }, + "model_type": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model Type", + "dynamic": false, + "external_options": {}, + "info": "Choose the Form Recognizer model to use", + "name": "model_type", + "options": [ + "prebuilt-document", + "prebuilt-read", + "prebuilt-layout" + ], + "options_metadata": [], "placeholder": "", "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "\n" + "value": "prebuilt-document" }, - "template": { - "_input_type": "MultilineInput", + "silent_errors": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Silent Errors", + "dynamic": false, + "info": "If true, errors will not raise an exception.", + "list": false, + "list_add_label": "Add More", + "name": "silent_errors", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + }, + "url": { + "_input_type": "HandleInput", "advanced": false, - "display_name": "Template", + "display_name": "URL", "dynamic": false, - "info": "The template to use for formatting the data. It can contain the keys {text}, {data} or any other key in the Data.", + "info": "URL to the document to process", "input_types": [ - "Message" + "str", + "Data", + "Message", + "list" ], "list": false, - "load_from_db": false, - "multiline": true, - "name": "template", + "list_add_label": "Add More", + "name": "url", "placeholder": "", - "required": true, + "required": false, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "use_multithreading": { + "_input_type": "BoolInput", + "advanced": false, + "display_name": "Use Concurrent Processing", + "dynamic": false, + "info": "Enable concurrent processing of multiple files", + "list": false, + "list_add_label": "Add More", + "name": "use_multithreading", + "placeholder": "", + "required": false, "show": true, "title_case": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, - "type": "str", - "value": "{text}" + "type": "bool", + "value": true } }, "tool_mode": false }, "showNode": true, - "type": "ParseData" + "type": "AzureDocumentIntelligence" }, - "id": "ParseData-u3h2k", + "id": "AzureDocumentIntelligence-J3BuB", "measured": { - "height": 350, + "height": 365, "width": 320 }, "position": { - "x": 955.7887892952915, - "y": -156.21377260789296 + "x": 1277.2467548657498, + "y": 577.571523636203 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "ClinicalLLM-Q2Siz", + "id": "ParserComponent-SJrwu", "node": { "base_classes": [ - "Data" + "Message" ], "beta": false, + "category": "processing", "conditional_paths": [], "custom_fields": {}, - "description": "Extract clinical entities from text using Clinical LLM.", - "display_name": "Clinical LLM", - "documentation": "https://docs.example.com/clinical-llm", + "description": "Extracts text using a template.", + "display_name": "Parser", + "documentation": "https://docs.langflow.org/components-processing#parser", "edited": false, "field_order": [ - "search_query" + "input_data", + "mode", + "pattern", + "sep" ], "frozen": false, - "icon": "Autonomize", + "icon": "braces", + "key": "ParserComponent", "legacy": false, - "lf_version": "1.1.1", + "lf_version": "1.6.3", "metadata": {}, "minimized": false, "output_types": [], "outputs": [ { + "allows_loop": false, "cache": true, - "display_name": "Clinical Entities", - "method": "build_output", - "name": "prediction", - "selected": "Data", + "display_name": "Parsed Text", + "group_outputs": false, + "method": "parse_combined_text", + "name": "parsed_text", + "selected": "Message", + "tool_mode": true, "types": [ - "Data" + "Message" ], "value": "__UNDEFINED__" } ], "pinned": false, + "score": 0.001, "template": { "_type": "Component", "code": { @@ -384,21 +521,88 @@ "show": true, "title_case": false, "type": "code", - "value": "from pydantic import BaseModel\n\nfrom langflow.base.modelhub import ATModelComponent\nfrom langflow.inputs.input_mixin import FieldTypes\nfrom langflow.io import MultilineInput, Output\nfrom langflow.schema import Data\nfrom langflow.services.modelhub.model_endpoint import ModelEndpoint\n\n\nclass Trait(BaseModel):\n Name: str\n Score: float\n\n\nclass Attribute(BaseModel):\n Id: int\n BeginOffset: int\n EndOffset: int\n Text: str\n Score: float\n Category: str\n Type: str\n Traits: list[Trait]\n\n\nclass Entity(BaseModel):\n Category: str\n Type: str\n Text: str\n BeginOffset: int\n EndOffset: int\n Score: float\n Traits: list[Trait]\n Id: int\n Attributes: list[Attribute] | None = None\n\n\nclass ClinicalPrediction(BaseModel):\n prediction: list[Entity]\n\n\nclass ClinicalLLMComponent(ATModelComponent):\n \"\"\"Component for the Clinical LLM model\"\"\"\n\n display_name: str = \"Clinical LLM\"\n description: str = \"Extract clinical entities from text using Clinical LLM.\"\n documentation: str = \"https://docs.example.com/clinical-llm\"\n icon: str = \"Autonomize\"\n name: str = \"ClinicalLLM\"\n _model_name = ModelEndpoint.CLINICAL_LLM\n\n inputs = [\n MultilineInput(\n name=\"search_query\",\n display_name=\"Search query\",\n field_type=FieldTypes.TEXT,\n multiline=True,\n )\n ]\n\n outputs = [\n Output(\n name=\"prediction\", display_name=\"Clinical Entities\", method=\"build_output\"\n ),\n ]\n\n async def extract_entities(self, text) -> ClinicalPrediction:\n \"\"\"Extract clinical entities from the input text\"\"\"\n try:\n response = await self.predict(text=text)\n return ClinicalPrediction(**response)\n except Exception as e:\n msg = f\"Error extracting clinical entities: {e!s}\"\n raise ValueError(msg) from e\n\n async def build_output(self) -> Data:\n \"\"\"Generate the output based on selected knowledgehub hubs.\"\"\"\n query_results = await self.extract_entities(self.search_query)\n data = Data(value={\"data\": query_results})\n self.status = data\n return data\n" + "value": "from langflow.custom.custom_component.component import Component\nfrom langflow.helpers.data import safe_convert\nfrom langflow.inputs.inputs import BoolInput, HandleInput, MessageTextInput, MultilineInput, TabInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.template.field.base import Output\n\n\nclass ParserComponent(Component):\n display_name = \"Parser\"\n description = \"Extracts text using a template.\"\n documentation: str = \"https://docs.langflow.org/components-processing#parser\"\n icon = \"braces\"\n\n inputs = [\n HandleInput(\n name=\"input_data\",\n display_name=\"Data or DataFrame\",\n input_types=[\"DataFrame\", \"Data\"],\n info=\"Accepts either a DataFrame or a Data object.\",\n required=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"Parser\", \"Stringify\"],\n value=\"Parser\",\n info=\"Convert into raw string instead of using a template.\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"pattern\",\n display_name=\"Template\",\n info=(\n \"Use variables within curly brackets to extract column values for DataFrames \"\n \"or key values for Data.\"\n \"For example: `Name: {Name}, Age: {Age}, Country: {Country}`\"\n ),\n value=\"Text: {text}\", # Example default\n dynamic=True,\n show=True,\n required=True,\n ),\n MessageTextInput(\n name=\"sep\",\n display_name=\"Separator\",\n advanced=True,\n value=\"\\n\",\n info=\"String used to separate rows/items.\",\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Parsed Text\",\n name=\"parsed_text\",\n info=\"Formatted text output.\",\n method=\"parse_combined_text\",\n ),\n ]\n\n def update_build_config(self, build_config, field_value, field_name=None):\n \"\"\"Dynamically hide/show `template` and enforce requirement based on `stringify`.\"\"\"\n if field_name == \"mode\":\n build_config[\"pattern\"][\"show\"] = self.mode == \"Parser\"\n build_config[\"pattern\"][\"required\"] = self.mode == \"Parser\"\n if field_value:\n clean_data = BoolInput(\n name=\"clean_data\",\n display_name=\"Clean Data\",\n info=(\n \"Enable to clean the data by removing empty rows and lines \"\n \"in each cell of the DataFrame/ Data object.\"\n ),\n value=True,\n advanced=True,\n required=False,\n )\n build_config[\"clean_data\"] = clean_data.to_dict()\n else:\n build_config.pop(\"clean_data\", None)\n\n return build_config\n\n def _clean_args(self):\n \"\"\"Prepare arguments based on input type.\"\"\"\n input_data = self.input_data\n\n match input_data:\n case list() if all(isinstance(item, Data) for item in input_data):\n msg = \"List of Data objects is not supported.\"\n raise ValueError(msg)\n case DataFrame():\n return input_data, None\n case Data():\n return None, input_data\n case dict() if \"data\" in input_data:\n try:\n if \"columns\" in input_data: # Likely a DataFrame\n return DataFrame.from_dict(input_data), None\n # Likely a Data object\n return None, Data(**input_data)\n except (TypeError, ValueError, KeyError) as e:\n msg = f\"Invalid structured input provided: {e!s}\"\n raise ValueError(msg) from e\n case _:\n msg = f\"Unsupported input type: {type(input_data)}. Expected DataFrame or Data.\"\n raise ValueError(msg)\n\n def parse_combined_text(self) -> Message:\n \"\"\"Parse all rows/items into a single text or convert input to string if `stringify` is enabled.\"\"\"\n # Early return for stringify option\n if self.mode == \"Stringify\":\n return self.convert_to_string()\n\n df, data = self._clean_args()\n\n lines = []\n if df is not None:\n for _, row in df.iterrows():\n formatted_text = self.pattern.format(**row.to_dict())\n lines.append(formatted_text)\n elif data is not None:\n formatted_text = self.pattern.format(**data.data)\n lines.append(formatted_text)\n\n combined_text = self.sep.join(lines)\n self.status = combined_text\n return Message(text=combined_text)\n\n def convert_to_string(self) -> Message:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n result = \"\"\n if isinstance(self.input_data, list):\n result = \"\\n\".join([safe_convert(item, clean_data=self.clean_data or False) for item in self.input_data])\n else:\n result = safe_convert(self.input_data or False)\n self.log(f\"Converted to string with length: {len(result)}\")\n\n message = Message(text=result)\n self.status = message\n return message\n" }, - "search_query": { - "_input_type": "MultilineInput", + "input_data": { + "_input_type": "HandleInput", "advanced": false, - "display_name": "Search query", + "display_name": "Data or DataFrame", "dynamic": false, - "info": "", + "info": "Accepts either a DataFrame or a Data object.", + "input_types": [ + "DataFrame", + "Data" + ], + "list": false, + "list_add_label": "Add More", + "name": "input_data", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "trace_as_metadata": true, + "type": "other", + "value": "" + }, + "mode": { + "_input_type": "TabInput", + "advanced": false, + "display_name": "Mode", + "dynamic": false, + "info": "Convert into raw string instead of using a template.", + "name": "mode", + "options": [ + "Parser", + "Stringify" + ], + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "tab", + "value": "Parser" + }, + "pattern": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "Template", + "dynamic": true, + "info": "Use variables within curly brackets to extract column values for DataFrames or key values for Data.For example: `Name: {Name}, Age: {Age}, Country: {Country}`", "input_types": [ "Message" ], "list": false, + "list_add_label": "Add More", "load_from_db": false, "multiline": true, - "name": "search_query", + "name": "pattern", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "{text}" + }, + "sep": { + "_input_type": "MessageTextInput", + "advanced": true, + "display_name": "Separator", + "dynamic": false, + "info": "String used to separate rows/items.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "sep", "placeholder": "", "required": false, "show": true, @@ -407,158 +611,259 @@ "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "" + "value": "\n" } }, "tool_mode": false }, "showNode": true, - "type": "ClinicalLLM" + "type": "ParserComponent" }, - "id": "ClinicalLLM-Q2Siz", + "id": "ParserComponent-SJrwu", "measured": { - "height": 254, + "height": 329, "width": 320 }, "position": { - "x": 1385.0103621193246, - "y": -195 + "x": 1641.4717455363013, + "y": 659.6202609077544 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "ParseData-gZwky", + "id": "Agent-d22BR", "node": { "base_classes": [ - "Data", "Message" ], "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Convert Data objects into Messages using any {field_name} from input data.", - "display_name": "Parse Data", - "documentation": "", + "description": "Define the agent's instructions, then enter a task to complete using tools.", + "display_name": "Agent", + "documentation": "https://docs.langflow.org/agents", "edited": false, "field_order": [ - "data", - "template", - "sep" + "agent_llm", + "max_tokens", + "model_kwargs", + "model_name", + "openai_api_base", + "api_key", + "temperature", + "seed", + "max_retries", + "timeout", + "system_prompt", + "n_messages", + "format_instructions", + "output_schema", + "tools", + "input_value", + "handle_parsing_errors", + "verbose", + "max_iterations", + "agent_description", + "add_current_date_tool" ], "frozen": false, - "icon": "message-square", - "legacy": true, - "lf_version": "1.1.1", - "metadata": { - "legacy_name": "Parse Data" - }, + "icon": "bot", + "last_updated": "2025-10-29T10:50:40.870Z", + "legacy": false, + "lf_version": "1.6.3", + "metadata": {}, "minimized": false, "output_types": [], "outputs": [ { "allows_loop": false, "cache": true, - "display_name": "Message", + "display_name": "Response", "group_outputs": false, - "method": "parse_data", - "name": "text", + "method": "message_response", + "name": "response", + "options": null, + "required_inputs": null, "selected": "Message", "tool_mode": true, "types": [ "Message" ], "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "Data List", - "group_outputs": false, - "method": "parse_data_as_list", - "name": "data_list", - "selected": "Data", - "tool_mode": true, - "types": [ - "Data" - ], - "value": "__UNDEFINED__" } ], "pinned": false, "template": { "_type": "Component", - "code": { + "add_current_date_tool": { + "_input_type": "BoolInput", "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", + "display_name": "Current Date", + "dynamic": false, + "info": "If true, will add a tool to the agent that returns the current date.", + "input_types": [], "list": false, + "list_add_label": "Add More", + "name": "add_current_date_tool", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "agent_description": { + "_input_type": "MultilineInput", + "advanced": true, + "copy_field": false, + "display_name": "Agent Description [Deprecated]", + "dynamic": false, + "info": "The description of the agent. This is only used when in Tool Mode. Defaults to 'A helpful assistant with access to the following tools:' and tools are added dynamically. This feature is deprecated and will be removed in future versions.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", "load_from_db": false, "multiline": true, - "name": "code", - "password": false, + "name": "agent_description", "placeholder": "", - "required": true, + "required": false, "show": true, "title_case": false, - "type": "code", - "value": "from langflow.custom.custom_component.component import Component\nfrom langflow.helpers.data import data_to_text, data_to_text_list\nfrom langflow.io import DataInput, MultilineInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.message import Message\n\n\nclass ParseDataComponent(Component):\n display_name = \"Data to Message\"\n description = \"Convert Data objects into Messages using any {field_name} from input data.\"\n icon = \"message-square\"\n name = \"ParseData\"\n legacy = True\n replacement = [\"processing.DataOperations\", \"processing.TypeConverterComponent\"]\n metadata = {\n \"legacy_name\": \"Parse Data\",\n }\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The data to convert to text.\",\n is_list=True,\n required=True,\n ),\n MultilineInput(\n name=\"template\",\n display_name=\"Template\",\n info=\"The template to use for formatting the data. \"\n \"It can contain the keys {text}, {data} or any other key in the Data.\",\n value=\"{text}\",\n required=True,\n ),\n StrInput(name=\"sep\", display_name=\"Separator\", advanced=True, value=\"\\n\"),\n ]\n\n outputs = [\n Output(\n display_name=\"Message\",\n name=\"text\",\n info=\"Data as a single Message, with each input Data separated by Separator\",\n method=\"parse_data\",\n ),\n Output(\n display_name=\"Data List\",\n name=\"data_list\",\n info=\"Data as a list of new Data, each having `text` formatted by Template\",\n method=\"parse_data_as_list\",\n ),\n ]\n\n def _clean_args(self) -> tuple[list[Data], str, str]:\n data = self.data if isinstance(self.data, list) else [self.data]\n template = self.template\n sep = self.sep\n return data, template, sep\n\n def parse_data(self) -> Message:\n data, template, sep = self._clean_args()\n result_string = data_to_text(template, data, sep)\n self.status = result_string\n return Message(text=result_string)\n\n def parse_data_as_list(self) -> list[Data]:\n data, template, _ = self._clean_args()\n text_list, data_list = data_to_text_list(template, data)\n for item, text in zip(data_list, text_list, strict=True):\n item.set_text(text)\n self.status = data_list\n return data_list\n" + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "A helpful assistant with access to the following tools:" }, - "data": { - "_input_type": "DataInput", + "agent_llm": { + "_input_type": "DropdownInput", "advanced": false, - "display_name": "Data", + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model Provider", "dynamic": false, - "info": "The data to convert to text.", - "input_types": [ - "Data" + "external_options": { + "fields": { + "data": { + "node": { + "display_name": "Connect other models", + "icon": "CornerDownLeft", + "name": "connect_other_models" + } + } + } + }, + "info": "The provider of the language model that the agent will use to generate responses.", + "input_types": [], + "name": "agent_llm", + "options": [ + "Anthropic", + "Google Generative AI", + "OpenAI", + "Azure OpenAI" + ], + "options_metadata": [ + { + "icon": "Anthropic" + }, + { + "icon": "GoogleGenerativeAI" + }, + { + "icon": "OpenAI" + }, + { + "icon": "Azure" + }, + { + "icon": "brain" + } ], - "list": true, - "name": "data", "placeholder": "", - "required": true, + "real_time_refresh": true, + "refresh_button": false, + "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, - "type": "other", + "type": "str", + "value": "Azure OpenAI" + }, + "api_key": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Azure Chat OpenAI API Key", + "dynamic": false, + "info": "", + "input_types": [], + "load_from_db": true, + "name": "api_key", + "password": true, + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "title_case": false, + "type": "str", "value": "" }, - "sep": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Separator", + "api_version": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "API Version", "dynamic": false, + "external_options": {}, "info": "", - "list": false, - "load_from_db": false, - "name": "sep", + "input_types": [], + "name": "api_version", + "options": [ + "2025-02-01-preview", + "2025-01-01-preview", + "2024-12-01-preview", + "2024-10-01-preview", + "2024-09-01-preview", + "2024-08-01-preview", + "2024-07-01-preview", + "2024-06-01", + "2024-03-01-preview", + "2024-02-15-preview", + "2023-12-01-preview", + "2023-05-15" + ], + "options_metadata": [], "placeholder": "", "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "\n" + "value": "2025-01-01-preview" }, - "template": { - "_input_type": "MultilineInput", + "azure_deployment": { + "_input_type": "MessageTextInput", "advanced": false, - "display_name": "Template", + "display_name": "Deployment Name", "dynamic": false, - "info": "The template to use for formatting the data. It can contain the keys {text}, {data} or any other key in the Data.", + "info": "", "input_types": [ "Message" ], "list": false, + "list_add_label": "Add More", "load_from_db": false, - "multiline": true, - "name": "template", + "name": "azure_deployment", "placeholder": "", "required": true, "show": true, @@ -567,76 +872,38 @@ "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "{value}" - } - }, - "tool_mode": false - }, - "showNode": true, - "type": "ParseData" - }, - "id": "ParseData-gZwky", - "measured": { - "height": 350, - "width": 320 - }, - "position": { - "x": 1755.14363405295, - "y": -59.499416685424876 - }, - "selected": false, - "type": "genericNode" - }, - { - "data": { - "id": "JSONOutput-Ru3z5", - "node": { - "base_classes": [ - "Message" - ], - "beta": false, - "conditional_paths": [], - "custom_fields": {}, - "description": "Display input data as JSON in the Playground.", - "display_name": "JSON Output", - "documentation": "", - "edited": false, - "field_order": [ - "data", - "pretty_print" - ], - "frozen": false, - "icon": "Braces", - "legacy": false, - "lf_version": "1.1.1", - "metadata": {}, - "minimized": false, - "output_types": [], - "outputs": [ - { - "allows_loop": false, - "cache": true, - "display_name": "JSON", - "group_outputs": false, - "method": "json_response", - "name": "json", - "selected": "Message", - "tool_mode": true, - "types": [ + "value": "gpt-4o" + }, + "azure_endpoint": { + "_input_type": "MessageTextInput", + "advanced": false, + "display_name": "Azure Endpoint", + "dynamic": false, + "info": "Your Azure endpoint, including the resource. Example: `https://example-resource.azure.openai.com/`", + "input_types": [ "Message" ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "template": { - "_type": "Component", + "list": false, + "list_add_label": "Add More", + "load_from_db": true, + "name": "azure_endpoint", + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "azure_endpoint_gpt-4o" + }, "code": { "advanced": true, "dynamic": true, "fileTypes": [], "file_path": "", "info": "", + "input_types": [], "list": false, "load_from_db": false, "multiline": true, @@ -647,37 +914,301 @@ "show": true, "title_case": false, "type": "code", - "value": "import json\n\nfrom langflow.base.io.text import TextComponent\nfrom langflow.inputs import DataInput\nfrom langflow.io import BoolInput, Output\nfrom langflow.schema import Data\nfrom langflow.schema.message import Message\n\n\nclass JSONOutputComponent(TextComponent):\n display_name = \"JSON Output\"\n description = \"Display input data as JSON in the Playground.\"\n icon = \"Braces\"\n name = \"JSONOutput\"\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The data to convert to JSON.\",\n is_list=True,\n ),\n BoolInput(\n name=\"pretty_print\",\n display_name=\"Pretty Print\",\n info=\"Format JSON with proper indentation\",\n value=True,\n advanced=True,\n ),\n ]\n outputs = [\n Output(display_name=\"JSON\", name=\"json\", method=\"json_response\"),\n ]\n\n def _process_data(self, data: Data | list[Data]) -> dict | list:\n \"\"\"Convert Data object(s) to dictionary/list format.\"\"\"\n if isinstance(data, list):\n return [item.dict() for item in data]\n return data.dict()\n\n def json_response(self) -> Message:\n try:\n # Process the Data input\n processed_data = self._process_data(self.data)\n\n # Convert to JSON string with optional pretty printing\n if self.pretty_print:\n formatted_json = json.dumps(\n processed_data, indent=2, ensure_ascii=False\n )\n else:\n formatted_json = json.dumps(processed_data, ensure_ascii=False)\n\n message = Message(text=formatted_json)\n self.status = formatted_json\n return message\n\n except Exception as e:\n error_message = f\"Error processing data to JSON: {e!s}\"\n message = Message(text=error_message)\n self.status = error_message\n return message\n" + "value": "import json\nimport re\n\nfrom langchain_core.tools import StructuredTool\nfrom pydantic import ValidationError\n\nfrom langflow.base.agents.agent import LCToolsAgentComponent\nfrom langflow.base.agents.events import ExceptionWithMessageError\nfrom langflow.base.models.model_input_constants import (\n ALL_PROVIDER_FIELDS,\n MODEL_DYNAMIC_UPDATE_FIELDS,\n MODEL_PROVIDERS_DICT,\n MODELS_METADATA,\n)\nfrom langflow.base.models.model_utils import get_model_name\nfrom langflow.components.helpers.current_date import CurrentDateComponent\nfrom langflow.components.helpers.memory import MemoryComponent\nfrom langflow.components.langchain_utilities.tool_calling import (\n ToolCallingAgentComponent,\n)\nfrom langflow.custom.custom_component.component import _get_component_toolkit\nfrom langflow.custom.utils import update_component_build_config\nfrom langflow.field_typing import Tool\nfrom langflow.helpers.base_model import build_model_from_schema\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n IntInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.schema.message import Message\nfrom langflow.schema.table import EditMode\nfrom langflow.custom.default_providers import apply_provider_defaults\n\n\ndef set_advanced_true(component_input):\n component_input.advanced = True\n return component_input\n\n\nMODEL_PROVIDERS_LIST = [\"Anthropic\", \"Google Generative AI\", \"OpenAI\", \"Azure OpenAI\"]\n\n\nclass AgentComponent(ToolCallingAgentComponent):\n display_name: str = \"Agent\"\n description: str = \"Define the agent's instructions, then enter a task to complete using tools.\"\n documentation: str = \"https://docs.langflow.org/agents\"\n icon = \"bot\"\n beta = False\n name = \"Agent\"\n\n memory_inputs = [set_advanced_true(component_input) for component_input in MemoryComponent().inputs]\n\n # Filter out json_mode from OpenAI inputs since we handle structured output differently\n if \"OpenAI\" in MODEL_PROVIDERS_DICT:\n openai_inputs_filtered = [\n input_field\n for input_field in MODEL_PROVIDERS_DICT[\"OpenAI\"][\"inputs\"]\n if not (hasattr(input_field, \"name\") and input_field.name == \"json_mode\")\n ]\n else:\n openai_inputs_filtered = []\n\n inputs = [\n DropdownInput(\n name=\"agent_llm\",\n display_name=\"Model Provider\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST],\n value=\"OpenAI\",\n real_time_refresh=True,\n refresh_button=False,\n input_types=[],\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST if key in MODELS_METADATA]\n + [{\"icon\": \"brain\"}],\n external_options={\n \"fields\": {\n \"data\": {\n \"node\": {\n \"name\": \"connect_other_models\",\n \"display_name\": \"Connect other models\",\n \"icon\": \"CornerDownLeft\",\n }\n }\n },\n },\n ),\n *openai_inputs_filtered,\n MultilineInput(\n name=\"system_prompt\",\n display_name=\"Agent Instructions\",\n info=\"System Prompt: Initial instructions and context provided to guide the agent's behavior.\",\n value=\"You are a helpful assistant that can use tools to answer questions and perform tasks.\",\n advanced=False,\n ),\n IntInput(\n name=\"n_messages\",\n display_name=\"Number of Chat History Messages\",\n value=100,\n info=\"Number of chat history messages to retrieve.\",\n advanced=True,\n show=True,\n ),\n MultilineInput(\n name=\"format_instructions\",\n display_name=\"Output Format Instructions\",\n info=\"Generic Template for structured output formatting. Valid only with Structured response.\",\n value=(\n \"You are an AI that extracts structured JSON objects from unstructured text. \"\n \"Use a predefined schema with expected types (str, int, float, bool, dict). \"\n \"Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. \"\n \"Fill missing or ambiguous values with defaults: null for missing values. \"\n \"Remove exact duplicates but keep variations that have different field values. \"\n \"Always return valid JSON in the expected format, never throw errors. \"\n \"If multiple objects can be extracted, return them all in the structured format.\"\n ),\n advanced=True,\n ),\n TableInput(\n name=\"output_schema\",\n display_name=\"Output Schema\",\n info=(\n \"Schema Validation: Define the structure and data types for structured output. \"\n \"No validation if no output schema.\"\n ),\n advanced=True,\n required=False,\n value=[],\n table_schema=[\n {\n \"name\": \"name\",\n \"display_name\": \"Name\",\n \"type\": \"str\",\n \"description\": \"Specify the name of the output field.\",\n \"default\": \"field\",\n \"edit_mode\": EditMode.INLINE,\n },\n {\n \"name\": \"description\",\n \"display_name\": \"Description\",\n \"type\": \"str\",\n \"description\": \"Describe the purpose of the output field.\",\n \"default\": \"description of field\",\n \"edit_mode\": EditMode.POPOVER,\n },\n {\n \"name\": \"type\",\n \"display_name\": \"Type\",\n \"type\": \"str\",\n \"edit_mode\": EditMode.INLINE,\n \"description\": (\"Indicate the data type of the output field (e.g., str, int, float, bool, dict).\"),\n \"options\": [\"str\", \"int\", \"float\", \"bool\", \"dict\"],\n \"default\": \"str\",\n },\n {\n \"name\": \"multiple\",\n \"display_name\": \"As List\",\n \"type\": \"boolean\",\n \"description\": \"Set to True if this output field should be a list of the specified type.\",\n \"default\": \"False\",\n \"edit_mode\": EditMode.INLINE,\n },\n ],\n ),\n *LCToolsAgentComponent._base_inputs,\n # removed memory inputs from agent component\n # *memory_inputs,\n BoolInput(\n name=\"add_current_date_tool\",\n display_name=\"Current Date\",\n advanced=True,\n info=\"If true, will add a tool to the agent that returns the current date.\",\n value=True,\n ),\n ]\n outputs = [\n Output(name=\"response\", display_name=\"Response\", method=\"message_response\"),\n ]\n\n async def get_agent_requirements(self):\n \"\"\"Get the agent requirements for the agent.\"\"\"\n llm_model, display_name = await self.get_llm()\n if llm_model is None:\n msg = \"No language model selected. Please choose a model to proceed.\"\n raise ValueError(msg)\n self.model_name = get_model_name(llm_model, display_name=display_name)\n\n # Get memory data\n self.chat_history = await self.get_memory_data()\n if isinstance(self.chat_history, Message):\n self.chat_history = [self.chat_history]\n\n # Add current date tool if enabled\n if self.add_current_date_tool:\n if not isinstance(self.tools, list): # type: ignore[has-type]\n self.tools = []\n current_date_tool = (await CurrentDateComponent(**self.get_base_args()).to_toolkit()).pop(0)\n if not isinstance(current_date_tool, StructuredTool):\n msg = \"CurrentDateComponent must be converted to a StructuredTool\"\n raise TypeError(msg)\n self.tools.append(current_date_tool)\n return llm_model, self.chat_history, self.tools\n\n async def message_response(self) -> Message:\n try:\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n # Set up and run agent\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=self.system_prompt,\n )\n agent = self.create_agent_runnable()\n result = await self.run_agent(agent)\n\n # Store result for potential JSON output\n self._agent_result = result\n\n except (ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"{type(e).__name__}: {e!s}\")\n raise\n except ExceptionWithMessageError as e:\n await logger.aerror(f\"ExceptionWithMessageError occurred: {e}\")\n raise\n # Avoid catching blind Exception; let truly unexpected exceptions propagate\n except Exception as e:\n await logger.aerror(f\"Unexpected error: {e!s}\")\n raise\n else:\n return result\n\n def _preprocess_schema(self, schema):\n \"\"\"Preprocess schema to ensure correct data types for build_model_from_schema.\"\"\"\n processed_schema = []\n for field in schema:\n processed_field = {\n \"name\": str(field.get(\"name\", \"field\")),\n \"type\": str(field.get(\"type\", \"str\")),\n \"description\": str(field.get(\"description\", \"\")),\n \"multiple\": field.get(\"multiple\", False),\n }\n # Ensure multiple is handled correctly\n if isinstance(processed_field[\"multiple\"], str):\n processed_field[\"multiple\"] = processed_field[\"multiple\"].lower() in [\n \"true\",\n \"1\",\n \"t\",\n \"y\",\n \"yes\",\n ]\n processed_schema.append(processed_field)\n return processed_schema\n\n async def build_structured_output_base(self, content: str):\n \"\"\"Build structured output with optional BaseModel validation.\"\"\"\n json_pattern = r\"\\{.*\\}\"\n schema_error_msg = \"Try setting an output schema\"\n\n # Try to parse content as JSON first\n json_data = None\n try:\n json_data = json.loads(content)\n except json.JSONDecodeError:\n json_match = re.search(json_pattern, content, re.DOTALL)\n if json_match:\n try:\n json_data = json.loads(json_match.group())\n except json.JSONDecodeError:\n return {\"content\": content, \"error\": schema_error_msg}\n else:\n return {\"content\": content, \"error\": schema_error_msg}\n\n # If no output schema provided, return parsed JSON without validation\n if not hasattr(self, \"output_schema\") or not self.output_schema or len(self.output_schema) == 0:\n return json_data\n\n # Use BaseModel validation with schema\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n\n # Validate against the schema\n if isinstance(json_data, list):\n # Multiple objects\n validated_objects = []\n for item in json_data:\n try:\n validated_obj = output_model.model_validate(item)\n validated_objects.append(validated_obj.model_dump())\n except ValidationError as e:\n await logger.aerror(f\"Validation error for item: {e}\")\n # Include invalid items with error info\n validated_objects.append({\"data\": item, \"validation_error\": str(e)})\n return validated_objects\n\n # Single object\n try:\n validated_obj = output_model.model_validate(json_data)\n return [validated_obj.model_dump()] # Return as list for consistency\n except ValidationError as e:\n await logger.aerror(f\"Validation error: {e}\")\n return [{\"data\": json_data, \"validation_error\": str(e)}]\n\n except (TypeError, ValueError) as e:\n await logger.aerror(f\"Error building structured output: {e}\")\n # Fallback to parsed JSON without validation\n return json_data\n\n async def json_response(self) -> Data:\n \"\"\"Convert agent response to structured JSON Data output with schema validation.\"\"\"\n # Always use structured chat agent for JSON response mode for better JSON formatting\n try:\n system_components = []\n\n # 1. Agent Instructions (system_prompt)\n agent_instructions = getattr(self, \"system_prompt\", \"\") or \"\"\n if agent_instructions:\n system_components.append(f\"{agent_instructions}\")\n\n # 2. Format Instructions\n format_instructions = getattr(self, \"format_instructions\", \"\") or \"\"\n if format_instructions:\n system_components.append(f\"Format instructions: {format_instructions}\")\n\n # 3. Schema Information from BaseModel\n if hasattr(self, \"output_schema\") and self.output_schema and len(self.output_schema) > 0:\n try:\n processed_schema = self._preprocess_schema(self.output_schema)\n output_model = build_model_from_schema(processed_schema)\n schema_dict = output_model.model_json_schema()\n schema_info = (\n \"You are given some text that may include format instructions, \"\n \"explanations, or other content alongside a JSON schema.\\n\\n\"\n \"Your task:\\n\"\n \"- Extract only the JSON schema.\\n\"\n \"- Return it as valid JSON.\\n\"\n \"- Do not include format instructions, explanations, or extra text.\\n\\n\"\n \"Input:\\n\"\n f\"{json.dumps(schema_dict, indent=2)}\\n\\n\"\n \"Output (only JSON schema):\"\n )\n system_components.append(schema_info)\n except (ValidationError, ValueError, TypeError, KeyError) as e:\n await logger.aerror(f\"Could not build schema for prompt: {e}\", exc_info=True)\n\n # Combine all components\n combined_instructions = \"\\n\\n\".join(system_components) if system_components else \"\"\n llm_model, self.chat_history, self.tools = await self.get_agent_requirements()\n self.set(\n llm=llm_model,\n tools=self.tools or [],\n chat_history=self.chat_history,\n input_value=self.input_value,\n system_prompt=combined_instructions,\n )\n\n # Create and run structured chat agent\n try:\n structured_agent = self.create_agent_runnable()\n except (NotImplementedError, ValueError, TypeError) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n raise\n try:\n result = await self.run_agent(structured_agent)\n except (\n ExceptionWithMessageError,\n ValueError,\n TypeError,\n RuntimeError,\n ) as e:\n await logger.aerror(f\"Error with structured agent result: {e}\")\n raise\n # Extract content from structured agent result\n if hasattr(result, \"content\"):\n content = result.content\n elif hasattr(result, \"text\"):\n content = result.text\n else:\n content = str(result)\n\n except (\n ExceptionWithMessageError,\n ValueError,\n TypeError,\n NotImplementedError,\n AttributeError,\n ) as e:\n await logger.aerror(f\"Error with structured chat agent: {e}\")\n # Fallback to regular agent\n content_str = \"No content returned from agent\"\n return Data(data={\"content\": content_str, \"error\": str(e)})\n\n # Process with structured output validation\n try:\n structured_output = await self.build_structured_output_base(content)\n\n # Handle different output formats\n if isinstance(structured_output, list) and structured_output:\n if len(structured_output) == 1:\n return Data(data=structured_output[0])\n return Data(data={\"results\": structured_output})\n if isinstance(structured_output, dict):\n return Data(data=structured_output)\n return Data(data={\"content\": content})\n\n except (ValueError, TypeError) as e:\n await logger.aerror(f\"Error in structured output processing: {e}\")\n return Data(data={\"content\": content, \"error\": str(e)})\n\n async def get_memory_data(self):\n # TODO: This is a temporary fix to avoid message duplication. We should develop a function for this.\n messages = (\n await MemoryComponent(**self.get_base_args())\n .set(\n session_id=self.graph.session_id,\n order=\"Ascending\",\n n_messages=self.n_messages,\n )\n .retrieve_messages()\n )\n return [\n message for message in messages if getattr(message, \"id\", None) != getattr(self.input_value, \"id\", None)\n ]\n\n async def get_llm(self):\n if not isinstance(self.agent_llm, str):\n return self.agent_llm, None\n\n try:\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if not provider_info:\n msg = f\"Invalid model provider: {self.agent_llm}\"\n raise ValueError(msg)\n\n component_class = provider_info.get(\"component_class\")\n display_name = component_class.display_name\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\", \"\")\n\n return self._build_llm_model(component_class, inputs, prefix), display_name\n\n except (AttributeError, ValueError, TypeError, RuntimeError) as e:\n await logger.aerror(f\"Error building {self.agent_llm} language model: {e!s}\")\n msg = f\"Failed to initialize language model: {e!s}\"\n raise ValueError(msg) from e\n\n def _build_llm_model(self, component, inputs, prefix=\"\"):\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n return component.set(**model_kwargs).build_model()\n\n def set_component_params(self, component):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n inputs = provider_info.get(\"inputs\")\n prefix = provider_info.get(\"prefix\")\n # Filter out json_mode and only use attributes that exist on this component\n model_kwargs = {}\n for input_ in inputs:\n if hasattr(self, f\"{prefix}{input_.name}\"):\n model_kwargs[input_.name] = getattr(self, f\"{prefix}{input_.name}\")\n\n return component.set(**model_kwargs)\n return component\n\n def delete_fields(self, build_config: dotdict, fields: dict | list[str]) -> None:\n \"\"\"Delete specified fields from build_config.\"\"\"\n for field in fields:\n build_config.pop(field, None)\n\n def update_input_types(self, build_config: dotdict) -> dotdict:\n \"\"\"Update input types for all fields in build_config.\"\"\"\n for key, value in build_config.items():\n if isinstance(value, dict):\n if value.get(\"input_types\") is None:\n build_config[key][\"input_types\"] = []\n elif hasattr(value, \"input_types\") and value.input_types is None:\n value.input_types = []\n return build_config\n\n\n async def update_build_config(\n self, build_config: dotdict, field_value: str, field_name: str | None = None\n ) -> dotdict:\n # Existing logic for updating build_config\n if field_name in (\"agent_llm\",):\n build_config[\"agent_llm\"][\"value\"] = field_value\n provider_info = MODEL_PROVIDERS_DICT.get(field_value)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call the component class's update_build_config method\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n\n provider_configs: dict[str, tuple[dict, list[dict]]] = {\n provider: (\n MODEL_PROVIDERS_DICT[provider][\"fields\"],\n [\n MODEL_PROVIDERS_DICT[other_provider][\"fields\"]\n for other_provider in MODEL_PROVIDERS_DICT\n if other_provider != provider\n ],\n )\n for provider in MODEL_PROVIDERS_DICT\n }\n \n if field_value in provider_configs:\n fields_to_add, fields_to_delete = provider_configs[field_value]\n\n # Delete fields from other providers\n for fields in fields_to_delete:\n self.delete_fields(build_config, fields)\n\n # Add provider-specific fields\n build_config.update(fields_to_add)\n \n # Apply provider-specific defaults (only for Azure OpenAI currently)\n if field_value == \"Azure OpenAI\":\n build_config = apply_provider_defaults(field_value, build_config)\n \n # Reset input types for agent_llm\n build_config[\"agent_llm\"][\"input_types\"] = []\n build_config[\"agent_llm\"][\"display_name\"] = \"Model Provider\"\n \n elif field_value == \"connect_other_models\":\n # Delete all provider fields\n self.delete_fields(build_config, ALL_PROVIDER_FIELDS)\n # Update with custom component\n custom_component = DropdownInput(\n name=\"agent_llm\",\n display_name=\"Language Model\",\n info=\"The provider of the language model that the agent will use to generate responses.\",\n options=[*MODEL_PROVIDERS_LIST],\n real_time_refresh=True,\n refresh_button=False,\n input_types=[\"LanguageModel\"],\n placeholder=\"Awaiting model input.\",\n options_metadata=[MODELS_METADATA[key] for key in MODEL_PROVIDERS_LIST if key in MODELS_METADATA],\n external_options={\n \"fields\": {\n \"data\": {\n \"node\": {\n \"name\": \"connect_other_models\",\n \"display_name\": \"Connect other models\",\n \"icon\": \"CornerDownLeft\",\n },\n }\n },\n },\n )\n build_config.update({\"agent_llm\": custom_component.to_dict()})\n \n # Update input types for all fields\n build_config = self.update_input_types(build_config)\n\n # Validate required keys\n default_keys = [\n \"code\",\n \"_type\",\n \"agent_llm\",\n \"tools\",\n \"input_value\",\n \"add_current_date_tool\",\n \"system_prompt\",\n \"agent_description\",\n \"max_iterations\",\n \"handle_parsing_errors\",\n \"verbose\",\n ]\n missing_keys = [key for key in default_keys if key not in build_config]\n if missing_keys:\n msg = f\"Missing required keys in build_config: {missing_keys}\"\n raise ValueError(msg)\n \n # Rest of your existing method remains unchanged...\n if (\n isinstance(self.agent_llm, str)\n and self.agent_llm in MODEL_PROVIDERS_DICT\n and field_name in MODEL_DYNAMIC_UPDATE_FIELDS\n ):\n provider_info = MODEL_PROVIDERS_DICT.get(self.agent_llm)\n if provider_info:\n component_class = provider_info.get(\"component_class\")\n component_class = self.set_component_params(component_class)\n prefix = provider_info.get(\"prefix\")\n if component_class and hasattr(component_class, \"update_build_config\"):\n # Call each component class's update_build_config method\n # remove the prefix from the field_name\n if isinstance(field_name, str) and isinstance(prefix, str):\n field_name = field_name.replace(prefix, \"\")\n build_config = await update_component_build_config(\n component_class, build_config, field_value, \"model_name\"\n )\n return dotdict({k: v.to_dict() if hasattr(v, \"to_dict\") else v for k, v in build_config.items()})\n\n async def _get_tools(self) -> list[Tool]:\n component_toolkit = _get_component_toolkit()\n tools_names = self._build_tools_names()\n agent_description = self.get_tool_description()\n # TODO: Agent Description Depreciated Feature to be removed\n description = f\"{agent_description}{tools_names}\"\n tools = component_toolkit(component=self).get_tools(\n tool_name=\"Call_Agent\",\n tool_description=description,\n callbacks=self.get_langchain_callbacks(),\n )\n if hasattr(self, \"tools_metadata\"):\n tools = component_toolkit(component=self, metadata=self.tools_metadata).update_tools_metadata(tools=tools)\n return tools\n" + }, + "format_instructions": { + "_input_type": "MultilineInput", + "advanced": true, + "copy_field": false, + "display_name": "Output Format Instructions", + "dynamic": false, + "info": "Generic Template for structured output formatting. Valid only with Structured response.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "format_instructions", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "You are an AI that extracts structured JSON objects from unstructured text. Use a predefined schema with expected types (str, int, float, bool, dict). Extract ALL relevant instances that match the schema - if multiple patterns exist, capture them all. Fill missing or ambiguous values with defaults: null for missing values. Remove exact duplicates but keep variations that have different field values. Always return valid JSON in the expected format, never throw errors. If multiple objects can be extracted, return them all in the structured format." + }, + "handle_parsing_errors": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Handle Parse Errors", + "dynamic": false, + "info": "Should the Agent fix errors when reading user input for better processing?", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "handle_parsing_errors", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": true + }, + "input_value": { + "_input_type": "MessageInput", + "advanced": false, + "display_name": "Input", + "dynamic": false, + "info": "The input provided by the user for the agent to process.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "input_value", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": true, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "max_iterations": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Max Iterations", + "dynamic": false, + "info": "The maximum number of attempts the agent can make to complete its task before it stops.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "max_iterations", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 15 + }, + "max_tokens": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Max Tokens", + "dynamic": false, + "info": "The maximum number of tokens to generate. Set to 0 for unlimited tokens.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "max_tokens", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 0 + }, + "n_messages": { + "_input_type": "IntInput", + "advanced": true, + "display_name": "Number of Chat History Messages", + "dynamic": false, + "info": "Number of chat history messages to retrieve.", + "input_types": [], + "list": false, + "list_add_label": "Add More", + "name": "n_messages", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "int", + "value": 100 + }, + "output_schema": { + "_input_type": "TableInput", + "advanced": true, + "display_name": "Output Schema", + "dynamic": false, + "info": "Schema Validation: Define the structure and data types for structured output. No validation if no output schema.", + "input_types": [], + "is_list": true, + "list_add_label": "Add More", + "name": "output_schema", + "placeholder": "", + "required": false, + "show": true, + "table_icon": "Table", + "table_schema": { + "columns": [ + { + "default": "field", + "description": "Specify the name of the output field.", + "disable_edit": false, + "display_name": "Name", + "edit_mode": "inline", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "name", + "sortable": true, + "type": "str" + }, + { + "default": "description of field", + "description": "Describe the purpose of the output field.", + "disable_edit": false, + "display_name": "Description", + "edit_mode": "popover", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "description", + "sortable": true, + "type": "str" + }, + { + "default": "str", + "description": "Indicate the data type of the output field (e.g., str, int, float, bool, dict).", + "disable_edit": false, + "display_name": "Type", + "edit_mode": "inline", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "type", + "options": [ + "str", + "int", + "float", + "bool", + "dict" + ], + "sortable": true, + "type": "str" + }, + { + "default": false, + "description": "Set to True if this output field should be a list of the specified type.", + "disable_edit": false, + "display_name": "As List", + "edit_mode": "inline", + "filterable": true, + "formatter": "boolean", + "hidden": false, + "name": "multiple", + "sortable": true, + "type": "boolean" + } + ] + }, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "trigger_icon": "Table", + "trigger_text": "Open table", + "type": "table", + "value": [] + }, + "system_prompt": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "Agent Instructions", + "dynamic": false, + "info": "System Prompt: Initial instructions and context provided to guide the agent's behavior.", + "input_types": [ + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "system_prompt", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "You are a clinical data specialist. Extract clinical entities with relationships from medical documents.\n\n## TASK\n\nWhen you receive medical text:\n1. Call the clinical entity extraction tool with the raw text\n2. Filter the prediction array to keep ONLY entities that have `Attributes`\n3. Report the filtered entities\n\n## FILTERING RULE\n\nFrom the prediction array, keep only entities where:\n- `Attributes` field exists AND is not empty (length > 0)\n```python\nfiltered = [e for e in predictions if e.get(\"Attributes\") and len(e[\"Attributes\"]) > 0]\n```\n\n## OUTPUT FORMAT\n\n**Entities with Attributes:**\n- Total entities extracted: [count]\n- Entities with attributes: [filtered count]\n\n**Results:**\nFor each entity:\n- Category: [Category]\n- Type: [Type]\n- Text: [Text]\n- Attributes: [list each attribute with its Type and Text]\n\n## RULES\n\n1. ALWAYS call the entity extraction tool first\n2. ONLY report entities that have Attributes\n3. DO NOT include entities with empty or missing Attributes\n4. Report exactly what the tool returns\n" + }, + "temperature": { + "_input_type": "SliderInput", + "advanced": true, + "display_name": "Temperature", + "dynamic": false, + "info": "Controls randomness. Lower values are more deterministic, higher values are more creative.", + "input_types": [], + "max_label": "", + "max_label_icon": "", + "min_label": "", + "min_label_icon": "", + "name": "temperature", + "placeholder": "", + "range_spec": { + "max": 2, + "min": 0, + "step": 0.01, + "step_type": "float" + }, + "required": false, + "show": true, + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "title_case": false, + "tool_mode": false, + "type": "slider", + "value": 0.08 }, - "data": { - "_input_type": "DataInput", + "tools": { + "_input_type": "HandleInput", "advanced": false, - "display_name": "Data", + "display_name": "Tools", "dynamic": false, - "info": "The data to convert to JSON.", + "info": "These are the tools that the agent can use to help with tasks.", "input_types": [ - "Data" + "Tool" ], "list": true, - "name": "data", + "list_add_label": "Add More", + "name": "tools", "placeholder": "", "required": false, "show": true, "title_case": false, - "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, "type": "other", "value": "" }, - "pretty_print": { + "verbose": { "_input_type": "BoolInput", "advanced": true, - "display_name": "Pretty Print", + "display_name": "Verbose", "dynamic": false, - "info": "Format JSON with proper indentation", + "info": "", + "input_types": [], "list": false, - "name": "pretty_print", + "list_add_label": "Add More", + "name": "verbose", "placeholder": "", "required": false, "show": true, @@ -691,24 +1222,24 @@ "tool_mode": false }, "showNode": true, - "type": "JSONOutput" + "type": "Agent" }, "dragging": false, - "id": "JSONOutput-Ru3z5", + "id": "Agent-d22BR", "measured": { - "height": 196, + "height": 759, "width": 320 }, "position": { - "x": 2531.1428432781945, - "y": 344.0833210216154 + "x": 2145.090558990585, + "y": 336.3779756915187 }, - "selected": true, + "selected": false, "type": "genericNode" }, { "data": { - "id": "File Path-8jQBJ", + "id": "AutonomizeModel-PPDxo", "node": { "base_classes": [ "Data" @@ -716,19 +1247,19 @@ "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Load files from server URLs", - "display_name": "File Path", - "documentation": "http://docs.langflow.org/components/server_file", + "description": "Unified interface for Autonomize text-based AI models with dropdown selection", + "display_name": "Autonomize Model", + "documentation": "https://docs.example.com/autonomize-models", "edited": false, "field_order": [ - "file_urls", - "validate_urls", - "return_all_urls" + "selected_model", + "search_query" ], "frozen": false, - "icon": "File", + "icon": "Autonomize", + "last_updated": "2025-10-29T10:50:39.867Z", "legacy": false, - "lf_version": "1.1.1", + "lf_version": "1.6.3", "metadata": {}, "minimized": false, "output_types": [], @@ -736,19 +1267,23 @@ { "allows_loop": false, "cache": true, - "display_name": "File Path", + "display_name": "Toolset", "group_outputs": false, - "method": "get_file_paths", - "name": "file_path", - "selected": "Data", + "hidden": null, + "method": "to_toolkit", + "name": "component_as_tool", + "options": null, + "required_inputs": null, + "selected": "Tool", "tool_mode": true, "types": [ - "Data" + "Tool" ], "value": "__UNDEFINED__" } ], "pinned": false, + "priority": 1, "template": { "_type": "Component", "code": { @@ -767,131 +1302,172 @@ "show": true, "title_case": false, "type": "code", - "value": "from __future__ import annotations\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, MultilineInput, Output\nfrom langflow.schema.data import Data\nfrom loguru import logger\n\n\nclass FileComponent(Component):\n display_name = \"File Path\"\n category: str = \"input_output\"\n description = \"Load files from server URLs\"\n documentation = \"http://docs.langflow.org/components/server_file\"\n icon = \"File\"\n name = \"File Path\"\n\n # Match the property name expected by FileComponent\n FILE_PATH_FIELD = \"file_path\"\n\n inputs = [\n MultilineInput(\n name=\"file_urls\",\n display_name=\"File URLs\",\n required=True,\n info=\"Enter one or more URLs (one per line) pointing to files on your server\",\n placeholder=\"https://example.com/file1.pdf\\nhttps://example.com/file2.pdf\",\n ),\n BoolInput(\n name=\"validate_urls\",\n display_name=\"Validate URLs\",\n info=\"If true, validates that URLs are accessible before returning them\",\n value=True,\n ),\n BoolInput(\n name=\"return_all_urls\",\n display_name=\"Return All URLs\",\n info=\"If true, returns all URLs even if some are invalid\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"file_path\", # Match the property name expected by FileComponent\n display_name=\"File Path\",\n method=\"get_file_paths\",\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._validated_urls: list[str] = []\n\n async def validate_url(self, url: str) -> bool:\n \"\"\"Validate that a URL is accessible.\"\"\"\n try:\n import aiohttp\n\n async with aiohttp.ClientSession() as session:\n async with session.head(url.strip()) as response:\n return response.status < 400\n except Exception as e:\n logger.error(f\"Error validating URL {url}: {e!s}\")\n return False\n\n async def get_file_paths(self) -> list[Data]:\n \"\"\"Get file paths for the FileComponent to process.\"\"\"\n try:\n if not self.file_urls:\n logger.warning(\"No URLs provided.\")\n return []\n\n # Split URLs by newlines and filter out empty lines\n urls = [url.strip() for url in self.file_urls.split(\"\\n\") if url.strip()]\n file_paths = []\n\n if self.validate_urls:\n # Validate all URLs concurrently\n import asyncio\n\n validation_tasks = [self.validate_url(url) for url in urls]\n validation_results = await asyncio.gather(*validation_tasks)\n\n # Pair URLs with their validation results\n valid_urls = [\n url\n for url, is_valid in zip(urls, validation_results, strict=False)\n if is_valid or self.return_all_urls\n ]\n\n if not valid_urls:\n logger.warning(\"No valid URLs found.\")\n return []\n\n self._validated_urls = valid_urls\n\n # Create Data objects for each valid URL\n for url in valid_urls:\n file_paths.append(Data(data={self.FILE_PATH_FIELD: url}))\n else:\n # If no validation required, create Data objects for all URLs\n file_paths = [Data(data={self.FILE_PATH_FIELD: url}) for url in urls]\n\n if file_paths:\n self.status = file_paths\n logger.info(f\"Generated {len(file_paths)} file paths\")\n for path in file_paths:\n logger.debug(f\"File path: {path.data.get(self.FILE_PATH_FIELD)}\")\n else:\n logger.warning(\"No file paths generated\")\n\n return file_paths\n\n except Exception as e:\n logger.error(f\"Error in get_file_paths: {e!s}\")\n return []\n\n def build(self) -> list[Data]:\n \"\"\"Build method to support both async and sync operation.\"\"\"\n import asyncio\n\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n return loop.run_until_complete(self.get_file_paths())\n finally:\n loop.close()\n" + "value": "\"\"\"Autonomize Model Component - Unified text-based model component with dropdown selection.\"\"\"\n\nimport ast\nimport json\nfrom typing import Any\n\nfrom langflow.services.modelhub.model_endpoint import ModelEndpoint\nfrom loguru import logger\n\nfrom langflow.base.modelhub import ATModelComponent\nfrom langflow.inputs.inputs import FieldTypes\nfrom langflow.io import DropdownInput, MultilineInput, Output\nfrom langflow.schema.data import Data\n\n\nclass AutonomizeModelComponent(ATModelComponent):\n \"\"\"Unified component for Autonomize text-based models with dropdown selection.\"\"\"\n\n display_name: str = \"Autonomize Model\"\n description: str = \"Unified interface for Autonomize text-based AI models with dropdown selection\"\n documentation: str = \"https://docs.example.com/autonomize-models\"\n icon: str = \"Autonomize\"\n name: str = \"AutonomizeModel\"\n category: str = \"models\"\n priority: int = 1 # High priority to appear near top\n\n # Model mapping for dropdown options\n MODEL_OPTIONS = {\n \"Clinical LLM\": ModelEndpoint.CLINICAL_LLM,\n \"Clinical Note Classifier\": ModelEndpoint.CLINICAL_NOTE_CLASSIFIER,\n \"Combined Entity Linking\": ModelEndpoint.COMBINED_ENTITY_LINKING,\n \"CPT Code\": ModelEndpoint.CPT_CODE,\n \"ICD-10 Code\": ModelEndpoint.ICD_10,\n \"RxNorm Code\": ModelEndpoint.RXNORM,\n \"Short Summary\": ModelEndpoint.SHORT_SUMMARY,\n \"Detailed Summary\": ModelEndpoint.DETAILED_SUMMARY,\n \"Page Level Classifier\": ModelEndpoint.PAGE_LEVEL_CLASSIFIER\n }\n\n # Model descriptions for UI\n MODEL_DESCRIPTIONS = {\n \"Clinical LLM\": \"Extract clinical entities from medical text\",\n \"Clinical Note Classifier\": \"Classify clinical notes by type\",\n \"Combined Entity Linking\": \"Link extracted entities to standard vocabularies\",\n \"CPT Code\": \"Extract CPT codes from medical text\",\n \"ICD-10 Code\": \"Extract ICD-10 codes from medical text\",\n \"RxNorm Code\": \"Extract RxNorm codes for medications\",\n \"Short Summary\": \"short summary\",\n \"Detailed Summary\": \"detailed summary\",\n \"Page Level Classifier\": \"page type classification\"\n }\n\n inputs = [\n DropdownInput(\n name=\"selected_model\",\n display_name=\"Model\",\n options=list(MODEL_OPTIONS.keys()),\n value=next(iter(MODEL_OPTIONS.keys())),\n info=\"Select the Autonomize model to use\",\n real_time_refresh=True,\n ),\n MultilineInput(\n name=\"search_query\",\n display_name=\"Text Input\",\n field_type=FieldTypes.TEXT,\n multiline=True,\n tool_mode=True,\n info=\"Input text to process with the selected model\",\n ),\n ]\n\n outputs = [\n Output(\n name=\"prediction\",\n display_name=\"Model Output\",\n method=\"build_output\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._current_model_endpoint = None\n # Initialize _model_name with the default model endpoint (required by ATModelComponent)\n self._model_name = self.MODEL_OPTIONS[next(iter(self.MODEL_OPTIONS.keys()))]\n\n @property\n def model_endpoint(self) -> ModelEndpoint:\n \"\"\"Get the current model endpoint based on selection.\"\"\"\n return self.MODEL_OPTIONS[self.selected_model]\n\n @property\n def model_name_from_endpoint(self) -> str:\n \"\"\"Get the model name from the ModelEndpoint.\"\"\"\n return self.model_endpoint.get_model()\n\n async def extract_entities(self, text: Any) -> dict:\n \"\"\"Extract entities using the selected model.\"\"\"\n # Handle different input formats\n if isinstance(text, str) and text.strip().startswith(\"{\"):\n try:\n text_dict = json.loads(text)\n text = text_dict\n except json.JSONDecodeError as e:\n logger.error(f\"Failed to parse JSON string: {e}\")\n # If JSON parsing fails, use the original text\n\n # Handle the case where input is a dictionary with result structure\n if isinstance(text, dict) and \"result\" in text:\n result = text[\"result\"]\n if isinstance(result, list) and len(result) > 0:\n # Extract text from the first result item\n first_result = result[0]\n if isinstance(first_result, dict) and \"text\" in first_result:\n extracted_text = first_result[\"text\"]\n text = extracted_text\n else:\n msg = \"First result item does not contain 'text' key\"\n raise ValueError(msg)\n else:\n msg = \"Result list is empty or not a list\"\n raise ValueError(msg)\n elif isinstance(text, dict) and \"text\" in text:\n text = text[\"text\"]\n elif hasattr(text, \"text\"):\n text = text.text\n\n try:\n # Use the standard predict method from ATModelComponent\n # Set the _model_name based on current selection\n self._model_name = self.model_endpoint\n\n response = await self.predict(text=text)\n\n # Handle string responses\n if isinstance(response, str):\n try:\n response = ast.literal_eval(response)\n except (ValueError, SyntaxError):\n # If it's not a valid Python literal, try JSON\n try:\n response = json.loads(response)\n except json.JSONDecodeError:\n # If neither works, wrap in a dict\n response = {\"result\": response}\n else:\n return response\n except Exception as e:\n msg = f\"Error processing with {self.model_name}: {e!s}\"\n logger.error(f\"API call failed: {msg}\")\n raise ValueError(msg) from e\n\n async def build_output(self) -> Data:\n \"\"\"Generate the output based on selected model.\"\"\"\n query_results = await self.extract_entities(self.search_query)\n\n # Create standardized output format\n output_data = {\n \"model\": self.selected_model,\n \"model_description\": self.MODEL_DESCRIPTIONS.get(self.selected_model, \"\"),\n \"data\": query_results\n }\n\n data = Data(value=output_data)\n self.status = f\"Processed with {self.selected_model}\"\n return data\n\n def build(self):\n \"\"\"Return the main build function for Langflow framework.\"\"\"\n return self.build_output\n" }, - "file_urls": { + "search_query": { "_input_type": "MultilineInput", "advanced": false, - "display_name": "File URLs", + "copy_field": false, + "display_name": "Text Input", "dynamic": false, - "info": "Enter one or more URLs (one per line) pointing to files on your server", + "info": "Input text to process with the selected model", "input_types": [ "Message" ], "list": false, + "list_add_label": "Add More", "load_from_db": false, "multiline": true, - "name": "file_urls", - "placeholder": "https://example.com/file1.pdf\nhttps://example.com/file2.pdf", - "required": true, + "name": "search_query", + "placeholder": "", + "required": false, "show": true, "title_case": false, - "tool_mode": false, + "tool_mode": true, "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "https://autonomizestorageaccount.blob.core.windows.net/genesis-platform-dev/PriorAuth000_2.pdf?sv=2025-01-05&st=2025-01-23T14%3A11%3A13Z&se=2025-01-24T14%3A11%3A13Z&skoid=77f6025b-720c-4af4-a874-9c1e1054680b&sktid=2a9d6d51-7674-4d37-8d71-1ee2fe30ccf4&skt=2025-01-23T14%3A11%3A13Z&ske=2025-01-24T14%3A11%3A13Z&sks=b&skv=2025-01-05&sr=b&sp=r&sig=n4Cqed%2BA%2F%2FgO4P8vaDEki87P0YbrebDe%2F5zDPjKmPcE%3D" + "value": "" }, - "return_all_urls": { - "_input_type": "BoolInput", + "selected_model": { + "_input_type": "DropdownInput", "advanced": false, - "display_name": "Return All URLs", + "combobox": false, + "dialog_inputs": {}, + "display_name": "Model", "dynamic": false, - "info": "If true, returns all URLs even if some are invalid", - "list": false, - "name": "return_all_urls", + "external_options": {}, + "info": "Select the Autonomize model to use", + "name": "selected_model", + "options": [ + "Clinical LLM", + "Clinical Note Classifier", + "Combined Entity Linking", + "CPT Code", + "ICD-10 Code", + "RxNorm Code", + "Short Summary", + "Detailed Summary", + "Page Level Classifier" + ], + "options_metadata": [], "placeholder": "", + "real_time_refresh": true, "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "str", + "value": "Clinical LLM" }, - "validate_urls": { - "_input_type": "BoolInput", + "tools_metadata": { + "_input_type": "ToolsInput", "advanced": false, - "display_name": "Validate URLs", + "display_name": "Actions", "dynamic": false, - "info": "If true, validates that URLs are accessible before returning them", - "list": false, - "name": "validate_urls", + "info": "Modify tool names and descriptions to help agents understand when to use each tool.", + "is_list": true, + "list_add_label": "Add More", + "name": "tools_metadata", "placeholder": "", + "real_time_refresh": true, "required": false, "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "tools", + "value": [ + { + "args": { + "search_query": { + "default": "", + "description": "Input text to process with the selected model", + "title": "Search Query", + "type": "string" + } + }, + "description": "used to extract clinical entities from the text", + "display_description": "Unified interface for Autonomize text-based AI models with dropdown selection", + "display_name": "build_output", + "name": "clinical_entity_extractor", + "readonly": false, + "status": true, + "tags": [ + "build_output" + ] + } + ] } }, - "tool_mode": false + "tool_mode": true }, "showNode": true, - "type": "File Path" + "type": "AutonomizeModel" }, - "dragging": false, - "id": "File Path-8jQBJ", + "id": "AutonomizeModel-PPDxo", "measured": { - "height": 327, + "height": 300, "width": 320 }, "position": { - "x": 244.28455112136976, - "y": 423.84335174163766 + "x": 1431.9314787549552, + "y": 215 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "azure_ocr-KoHV8", + "id": "ChatOutput-jYmRf", "node": { "base_classes": [ - "Data" + "Message" ], "beta": false, + "category": "input_output", "conditional_paths": [], "custom_fields": {}, - "description": "Process documents using Azure Form Recognizer OCR capabilities", - "display_name": "OCR", - "documentation": "", + "description": "Display a chat message in the Playground.", + "display_name": "Chat Output", + "documentation": "https://docs.langflow.org/components-io#chat-output", "edited": false, "field_order": [ - "file_path", - "silent_errors", - "delete_server_file_after_processing", - "ignore_unsupported_extensions", - "ignore_unspecified_files", - "model_type", - "extract_tables", - "include_confidence", - "use_multithreading", - "concurrency_multithreading" + "input_value", + "should_store_message", + "sender", + "sender_name", + "session_id", + "data_template" ], "frozen": false, - "icon": "Azure", + "icon": "MessagesSquare", + "key": "ChatOutput", "legacy": false, - "lf_version": "1.1.1", + "lf_version": "1.6.3", "metadata": {}, - "minimized": false, + "minimized": true, "output_types": [], "outputs": [ { + "allows_loop": false, "cache": true, - "display_name": "Structured Data", - "method": "load_files", - "name": "structured_data", - "selected": "Data", + "display_name": "Output Message", + "group_outputs": false, + "method": "message_response", + "name": "message", + "selected": "Message", + "tool_mode": true, "types": [ - "Data" + "Message" ], "value": "__UNDEFINED__" } ], "pinned": false, + "score": 0.003169567463043492, "template": { "_type": "Component", "code": { @@ -910,177 +1486,133 @@ "show": true, "title_case": false, "type": "code", - "value": "import asyncio\nimport concurrent.futures\nimport mimetypes\nimport os\nimport tempfile\nfrom pathlib import Path\nfrom urllib.parse import unquote, urlparse\n\nimport aiohttp\nimport requests\nfrom loguru import logger\n\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.io import BoolInput, DropdownInput, IntInput, Output\nfrom langflow.schema import Data\nfrom langflow.services.deps import (\n get_azure_ocr_service,\n) # You'll need to create this\n\n\nclass AzureOCRComponent(BaseFileComponent):\n \"\"\"Component for OCR processing using Azure Form Recognizer.\"\"\"\n\n display_name = \"Form Recognizer\"\n description = \"Process documents using Azure Form Recognizer OCR capabilities\"\n icon = \"Azure\"\n name = \"azure_ocr\"\n\n VALID_EXTENSIONS = [\"pdf\", \"jpg\", \"jpeg\", \"png\", \"bmp\", \"tiff\", \"tif\"]\n\n inputs = [\n # Include only the HandleInput and BoolInputs from base_inputs\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"file_path\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"silent_errors\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"delete_server_file_after_processing\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unsupported_extensions\"\n ),\n next(\n input\n for input in BaseFileComponent._base_inputs\n if input.name == \"ignore_unspecified_files\"\n ),\n DropdownInput(\n name=\"model_type\",\n display_name=\"Model Type\",\n options=[\"prebuilt-document\", \"prebuilt-read\", \"prebuilt-layout\"],\n value=\"prebuilt-document\",\n info=\"Choose the Form Recognizer model to use\",\n ),\n BoolInput(\n name=\"extract_tables\",\n display_name=\"Extract Tables\",\n value=True,\n info=\"Extract and format tables from the document\",\n ),\n BoolInput(\n name=\"include_confidence\",\n display_name=\"Include Confidence Scores\",\n value=False,\n advanced=True,\n info=\"Include confidence scores in the extracted text\",\n ),\n BoolInput(\n name=\"use_multithreading\",\n display_name=\"Use Concurrent Processing\",\n value=True,\n info=\"Enable concurrent processing of multiple files\",\n ),\n IntInput(\n name=\"concurrency_multithreading\",\n display_name=\"Processing Concurrency\",\n advanced=True,\n info=\"Number of files to process concurrently\",\n value=2,\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Structured Data\", name=\"structured_data\", method=\"load_files\"\n ),\n ]\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self.temp_dir = tempfile.mkdtemp()\n self._downloaded_files = {}\n\n def get_text_content(self) -> str:\n \"\"\"Return the concatenated text content from all processed pages.\"\"\"\n return self._text_content\n\n def _extract_filename_from_url(self, url: str) -> str:\n \"\"\"Extract filename from URL or generate a default one.\"\"\"\n try:\n parsed_url = urlparse(url)\n path = unquote(parsed_url.path)\n filename = os.path.basename(path)\n\n if filename and \".\" in filename:\n return filename\n\n response = requests.head(url, allow_redirects=True)\n if \"content-disposition\" in response.headers:\n content_disp = response.headers[\"content-disposition\"]\n if \"filename=\" in content_disp:\n return content_disp.split(\"filename=\")[1].strip(\"\\\"'\")\n\n if \"content-type\" in response.headers:\n ext = mimetypes.guess_extension(response.headers[\"content-type\"])\n if ext:\n return f\"downloaded{ext}\"\n\n return \"downloaded.pdf\"\n except Exception as e:\n logger.error(f\"Error extracting filename from URL: {e!s}\")\n return \"downloaded.pdf\"\n\n async def _download_file_from_url(self, url: str) -> str | None:\n \"\"\"Download a file from a URL.\"\"\"\n try:\n filename = self._extract_filename_from_url(url)\n local_path = os.path.join(self.temp_dir, filename)\n\n async with aiohttp.ClientSession() as session:\n async with session.get(url) as response:\n response.raise_for_status()\n with open(local_path, \"wb\") as f:\n while True:\n chunk = await response.content.read(8192)\n if not chunk:\n break\n f.write(chunk)\n\n self._downloaded_files[url] = local_path\n logger.info(f\"Successfully downloaded file to {local_path}\")\n return local_path\n\n except Exception as e:\n logger.error(f\"Error downloading file from URL: {e!s}\")\n if not self.silent_errors:\n raise\n return None\n\n def _validate_and_resolve_paths(self) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Handle URLs and local paths.\"\"\"\n resolved_files = []\n file_path = self._file_path_as_list()\n\n for obj in file_path:\n server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)\n\n if not server_file_path:\n if not self.ignore_unspecified_files:\n msg = f\"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property.\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n try:\n # Check if it's a URL\n if isinstance(server_file_path, str) and server_file_path.startswith(\n (\"http://\", \"https://\")\n ):\n # Create event loop for async download\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n local_path = loop.run_until_complete(\n self._download_file_from_url(server_file_path)\n )\n finally:\n loop.close()\n\n if not local_path:\n continue\n\n # Create a new Data object with both the original URL and local path\n new_data = Data(\n data={\n self.SERVER_FILE_PATH_FIELDNAME: local_path,\n \"original_url\": server_file_path,\n }\n )\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n new_data,\n Path(local_path),\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n else:\n # Handle local files\n resolved_path = Path(self.resolve_path(str(server_file_path)))\n if not resolved_path.exists():\n msg = f\"File not found: {server_file_path}\"\n if not self.silent_errors:\n raise ValueError(msg)\n continue\n\n resolved_files.append(\n BaseFileComponent.BaseFile(\n obj,\n resolved_path,\n delete_after_processing=self.delete_server_file_after_processing,\n )\n )\n\n except Exception as e:\n logger.error(f\"Error processing path {server_file_path}: {e!s}\")\n if not self.silent_errors:\n raise\n continue\n\n return resolved_files\n\n async def process_file(\n self, file_path: str, *, silent_errors: bool = False\n ) -> tuple[Data, str]:\n \"\"\"Process a single file using the OCR service.\"\"\"\n try:\n service = get_azure_ocr_service()\n\n with open(file_path, \"rb\") as file:\n file_content = file.read()\n\n extracted_content, plain_text = await service.process_document(\n file_content=file_content,\n model_type=self.model_type,\n include_confidence=self.include_confidence,\n extract_tables=self.extract_tables,\n )\n\n structured_data = Data(\n text=plain_text,\n data={\n self.SERVER_FILE_PATH_FIELDNAME: str(file_path),\n \"result\": extracted_content,\n },\n )\n\n return structured_data, plain_text\n\n except Exception as e:\n logger.error(f\"Error processing file {file_path}: {e!s}\")\n if not silent_errors:\n raise\n return None, \"\"\n\n def process_files(\n self, file_list: list[BaseFileComponent.BaseFile]\n ) -> list[BaseFileComponent.BaseFile]:\n \"\"\"Process multiple files with concurrent processing.\"\"\"\n if not file_list:\n msg = \"No files to process.\"\n raise ValueError(msg)\n\n concurrency = (\n 1\n if not self.use_multithreading\n else max(1, self.concurrency_multithreading)\n )\n file_count = len(file_list)\n\n logger.info(f\"Processing {file_count} files with concurrency: {concurrency}\")\n\n all_plain_text = []\n processed_data = []\n\n if concurrency > 1 and file_count > 1:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n with concurrent.futures.ThreadPoolExecutor(\n max_workers=concurrency\n ) as executor:\n future_to_file = {\n executor.submit(\n lambda path: loop.run_until_complete(\n self.process_file(\n str(path), silent_errors=self.silent_errors\n )\n ),\n file.path,\n ): file\n for file in file_list\n }\n for future in concurrent.futures.as_completed(future_to_file):\n try:\n structured_data, plain_text = future.result()\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error in concurrent processing: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n else:\n loop = asyncio.new_event_loop()\n asyncio.set_event_loop(loop)\n try:\n for file in file_list:\n try:\n structured_data, plain_text = loop.run_until_complete(\n self.process_file(\n str(file.path), silent_errors=self.silent_errors\n )\n )\n processed_data.append(structured_data)\n all_plain_text.append(plain_text)\n except Exception as e:\n logger.error(f\"Error processing file {file.path}: {e!s}\")\n if not self.silent_errors:\n raise\n processed_data.append(None)\n all_plain_text.append(\"\")\n finally:\n loop.close()\n\n # Store concatenated text content\n self._text_content = \"\\n\\n=== NEW DOCUMENT ===\\n\\n\".join(all_plain_text)\n\n return self.rollup_data(file_list, processed_data)\n\n def __del__(self):\n \"\"\"Cleanup temporary files and directory.\"\"\"\n try:\n if hasattr(self, \"temp_dir\") and os.path.exists(self.temp_dir):\n # Remove downloaded files\n for file_path in self._downloaded_files.values():\n if os.path.exists(file_path):\n os.unlink(file_path)\n # Remove the temporary directory\n os.rmdir(self.temp_dir)\n except Exception as e:\n logger.error(f\"Error cleaning up temporary files: {e!s}\")\n" - }, - "concurrency_multithreading": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Processing Concurrency", - "dynamic": false, - "info": "Number of files to process concurrently", - "list": false, - "name": "concurrency_multithreading", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 2 + "value": "from collections.abc import Generator\nfrom typing import Any\n\nimport orjson\nfrom fastapi.encoders import jsonable_encoder\n\nfrom langflow.base.io.chat import ChatComponent\nfrom langflow.helpers.data import safe_convert\nfrom langflow.inputs.inputs import BoolInput, DropdownInput, HandleInput, MessageTextInput\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.schema.message import Message\nfrom langflow.schema.properties import Source\nfrom langflow.template.field.base import Output\nfrom langflow.utils.constants import (\n MESSAGE_SENDER_AI,\n MESSAGE_SENDER_NAME_AI,\n MESSAGE_SENDER_USER,\n)\n\n\nclass ChatOutput(ChatComponent):\n display_name = \"Chat Output\"\n description = \"Display a chat message in the Playground.\"\n documentation: str = \"https://docs.langflow.org/components-io#chat-output\"\n icon = \"MessagesSquare\"\n name = \"ChatOutput\"\n minimized = True\n\n inputs = [\n HandleInput(\n name=\"input_value\",\n display_name=\"Inputs\",\n info=\"Message to be passed as output.\",\n input_types=[\"Data\", \"DataFrame\", \"Message\"],\n required=True,\n ),\n BoolInput(\n name=\"should_store_message\",\n display_name=\"Store Messages\",\n info=\"Store the message in the history.\",\n value=True,\n advanced=True,\n ),\n DropdownInput(\n name=\"sender\",\n display_name=\"Sender Type\",\n options=[MESSAGE_SENDER_AI, MESSAGE_SENDER_USER],\n value=MESSAGE_SENDER_AI,\n advanced=True,\n info=\"Type of sender.\",\n ),\n MessageTextInput(\n name=\"sender_name\",\n display_name=\"Sender Name\",\n info=\"Name of the sender.\",\n value=MESSAGE_SENDER_NAME_AI,\n advanced=True,\n ),\n MessageTextInput(\n name=\"session_id\",\n display_name=\"Session ID\",\n info=\"The session ID of the chat. If empty, the current session ID parameter will be used.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"data_template\",\n display_name=\"Data Template\",\n value=\"{text}\",\n advanced=True,\n info=\"Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.\",\n ),\n ]\n outputs = [\n Output(\n display_name=\"Output Message\",\n name=\"message\",\n method=\"message_response\",\n ),\n ]\n\n def _build_source(self, id_: str | None, display_name: str | None, source: str | None) -> Source:\n source_dict = {}\n if id_:\n source_dict[\"id\"] = id_\n if display_name:\n source_dict[\"display_name\"] = display_name\n if source:\n # Handle case where source is a ChatOpenAI object\n if hasattr(source, \"model_name\"):\n source_dict[\"source\"] = source.model_name\n elif hasattr(source, \"model\"):\n source_dict[\"source\"] = str(source.model)\n else:\n source_dict[\"source\"] = str(source)\n return Source(**source_dict)\n\n async def message_response(self) -> Message:\n # First convert the input to string if needed\n text = self.convert_to_string()\n\n # Get source properties\n source, icon, display_name, source_id = self.get_properties_from_source_component()\n\n # Create or use existing Message object\n if isinstance(self.input_value, Message):\n message = self.input_value\n # Update message properties\n message.text = text\n else:\n message = Message(text=text)\n\n # Set message properties\n message.sender = self.sender\n message.sender_name = self.sender_name\n message.session_id = self.session_id\n message.flow_id = self.graph.flow_id if hasattr(self, \"graph\") else None\n message.properties.source = self._build_source(source_id, display_name, source)\n\n # Store message if needed\n if self.session_id and self.should_store_message:\n stored_message = await self.send_message(message)\n self.message.value = stored_message\n message = stored_message\n\n self.status = message\n return message\n\n def _serialize_data(self, data: Data) -> str:\n \"\"\"Serialize Data object to JSON string.\"\"\"\n # Convert data.data to JSON-serializable format\n serializable_data = jsonable_encoder(data.data)\n # Serialize with orjson, enabling pretty printing with indentation\n json_bytes = orjson.dumps(serializable_data, option=orjson.OPT_INDENT_2)\n # Convert bytes to string and wrap in Markdown code blocks\n return \"```json\\n\" + json_bytes.decode(\"utf-8\") + \"\\n```\"\n\n def _validate_input(self) -> None:\n \"\"\"Validate the input data and raise ValueError if invalid.\"\"\"\n if self.input_value is None:\n msg = \"Input data cannot be None\"\n raise ValueError(msg)\n if isinstance(self.input_value, list) and not all(\n isinstance(item, Message | Data | DataFrame | str) for item in self.input_value\n ):\n invalid_types = [\n type(item).__name__\n for item in self.input_value\n if not isinstance(item, Message | Data | DataFrame | str)\n ]\n msg = f\"Expected Data or DataFrame or Message or str, got {invalid_types}\"\n raise TypeError(msg)\n if not isinstance(\n self.input_value,\n Message | Data | DataFrame | str | list | Generator | type(None),\n ):\n type_name = type(self.input_value).__name__\n msg = f\"Expected Data or DataFrame or Message or str, Generator or None, got {type_name}\"\n raise TypeError(msg)\n\n def convert_to_string(self) -> str | Generator[Any, None, None]:\n \"\"\"Convert input data to string with proper error handling.\"\"\"\n self._validate_input()\n if isinstance(self.input_value, list):\n clean_data: bool = getattr(self, \"clean_data\", False)\n return \"\\n\".join([safe_convert(item, clean_data=clean_data) for item in self.input_value])\n if isinstance(self.input_value, Generator):\n return self.input_value\n return safe_convert(self.input_value)\n" }, - "delete_server_file_after_processing": { - "_input_type": "BoolInput", + "data_template": { + "_input_type": "MessageTextInput", "advanced": true, - "display_name": "Delete Server File After Processing", - "dynamic": false, - "info": "If true, the Server File Path will be deleted after processing.", - "list": false, - "name": "delete_server_file_after_processing", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "extract_tables": { - "_input_type": "BoolInput", - "advanced": false, - "display_name": "Extract Tables", + "display_name": "Data Template", "dynamic": false, - "info": "Extract and format tables from the document", + "info": "Template to convert Data to Text. If left empty, it will be dynamically set to the Data's text key.", + "input_types": [ + "Message" + ], "list": false, - "name": "extract_tables", + "list_add_label": "Add More", + "load_from_db": false, + "name": "data_template", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "str", + "value": "{text}" }, - "file_path": { + "input_value": { "_input_type": "HandleInput", "advanced": false, - "display_name": "URL", + "display_name": "Inputs", "dynamic": false, - "info": "Upload file via URL or local server path. Supports: \n1. Direct HTTP/HTTPS URLs for remote files\n2. Local server file paths\n3. Data objects with file path property\n4. Message objects containing file paths\n\nSupports the same file types as the Path input. Takes precedence over Path input when both are provided.", + "info": "Message to be passed as output.", "input_types": [ "Data", + "DataFrame", "Message" ], - "list": true, - "name": "file_path", + "list": false, + "list_add_label": "Add More", + "name": "input_value", "placeholder": "", - "required": false, + "required": true, "show": true, "title_case": false, "trace_as_metadata": true, "type": "other", "value": "" }, - "ignore_unspecified_files": { - "_input_type": "BoolInput", + "sender": { + "_input_type": "DropdownInput", "advanced": true, - "display_name": "Ignore Unspecified Files", + "combobox": false, + "dialog_inputs": {}, + "display_name": "Sender Type", "dynamic": false, - "info": "If true, Data with no 'file_path' property will be ignored.", - "list": false, - "name": "ignore_unspecified_files", + "external_options": {}, + "info": "Type of sender.", + "name": "sender", + "options": [ + "Machine", + "User" + ], + "options_metadata": [], "placeholder": "", "required": false, "show": true, "title_case": false, + "toggle": false, "tool_mode": false, "trace_as_metadata": true, - "type": "bool", - "value": false + "type": "str", + "value": "Machine" }, - "ignore_unsupported_extensions": { - "_input_type": "BoolInput", + "sender_name": { + "_input_type": "MessageTextInput", "advanced": true, - "display_name": "Ignore Unsupported Extensions", + "display_name": "Sender Name", "dynamic": false, - "info": "If true, files with unsupported extensions will not be processed.", + "info": "Name of the sender.", + "input_types": [ + "Message" + ], "list": false, - "name": "ignore_unsupported_extensions", + "list_add_label": "Add More", + "load_from_db": false, + "name": "sender_name", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, - "type": "bool", - "value": true + "type": "str", + "value": "AI" }, - "include_confidence": { - "_input_type": "BoolInput", + "session_id": { + "_input_type": "MessageTextInput", "advanced": true, - "display_name": "Include Confidence Scores", - "dynamic": false, - "info": "Include confidence scores in the extracted text", - "list": false, - "name": "include_confidence", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "model_type": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "display_name": "Model Type", + "display_name": "Session ID", "dynamic": false, - "info": "Choose the Form Recognizer model to use", - "name": "model_type", - "options": [ - "prebuilt-document", - "prebuilt-read", - "prebuilt-layout" + "info": "The session ID of the chat. If empty, the current session ID parameter will be used.", + "input_types": [ + "Message" ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "session_id", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, + "trace_as_input": true, "trace_as_metadata": true, "type": "str", - "value": "prebuilt-document" + "value": "" }, - "silent_errors": { + "should_store_message": { "_input_type": "BoolInput", "advanced": true, - "display_name": "Silent Errors", - "dynamic": false, - "info": "If true, errors will not raise an exception.", - "list": false, - "name": "silent_errors", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "use_multithreading": { - "_input_type": "BoolInput", - "advanced": false, - "display_name": "Use Concurrent Processing", + "display_name": "Store Messages", "dynamic": false, - "info": "Enable concurrent processing of multiple files", + "info": "Store the message in the history.", "list": false, - "name": "use_multithreading", + "list_add_label": "Add More", + "name": "should_store_message", "placeholder": "", "required": false, "show": true, @@ -1093,53 +1625,63 @@ }, "tool_mode": false }, - "showNode": true, - "type": "azure_ocr" + "showNode": false, + "type": "ChatOutput" }, - "dragging": false, - "id": "azure_ocr-KoHV8", + "id": "ChatOutput-jYmRf", "measured": { - "height": 395, - "width": 320 + "height": 48, + "width": 192 }, "position": { - "x": 572.6399112675325, - "y": -2.862058198044508 + "x": 2545.712841397065, + "y": 576.3692648499549 }, "selected": false, "type": "genericNode" }, { "data": { - "id": "RelationExtraction-TvyqO", + "id": "BlobStorage-Yg2WN", "node": { "base_classes": [ "Data" ], "beta": false, + "category": "input_output", "conditional_paths": [], "custom_fields": {}, - "description": "Identifies and extracts relevant lab results from medical records.", - "display_name": "Relation Extraction", - "documentation": "", + "description": "Load files from Azure Blob Storage", + "display_name": "Blob Storage", + "documentation": "http://docs.langflow.org/components/storage", "edited": false, "field_order": [ - "data" + "storage_account", + "container_name", + "file_name", + "return_all_files" ], "frozen": false, "icon": "Autonomize", + "key": "BlobStorage", + "last_updated": "2025-10-28T20:15:03.077Z", "legacy": false, - "lf_version": "1.1.1", + "lf_version": "1.6.3", "metadata": {}, "minimized": false, "output_types": [], "outputs": [ { + "allows_loop": false, "cache": true, - "display_name": "Data List", - "method": "parse_data_as_list", - "name": "data_list", + "display_name": "File Path", + "group_outputs": false, + "method": "get_file_paths", + "name": "file_path", + "options": null, + "required_inputs": null, "selected": "Data", + "tool_mode": true, "types": [ "Data" ], @@ -1147,6 +1689,7 @@ } ], "pinned": false, + "score": 0.2945640631554785, "template": { "_type": "Component", "code": { @@ -1165,61 +1708,325 @@ "show": true, "title_case": false, "type": "code", - "value": "from langflow.custom import Component\nfrom langflow.helpers.data import data_to_text_list\nfrom langflow.io import DataInput, Output\nfrom langflow.schema import Data\n\n\nclass RelationExtraction(Component):\n display_name = \"Relation Extraction\"\n description = \"Identifies and extracts relevant lab results from medical records.\"\n icon = \"Autonomize\"\n name = \"RelationExtraction\"\n\n inputs = [\n DataInput(\n name=\"data\",\n display_name=\"Data\",\n info=\"The data to convert to text.\",\n is_list=True,\n ),\n ]\n\n outputs = [\n Output(\n display_name=\"Data List\",\n name=\"data_list\",\n info=\"Data as a list of new Data, each having `text` formatted by Template\",\n method=\"parse_data_as_list\",\n ),\n ]\n\n def _clean_args(self) -> tuple[list[Data], str, str]:\n data = self.data if isinstance(self.data, list) else [self.data]\n template = \"{value}\"\n return data\n\n def parse_data_as_list(self) -> Data:\n data = self._clean_args()\n text_list, data_list = data_to_text_list(\"{value}\", data)\n for item, text in zip(data_list, text_list, strict=True):\n item.set_text(text)\n extracted_values = self.extract_relations(data_list)\n result = Data(value={\"data\": extracted_values})\n return result\n\n def extract_relations(self, data) -> list[Data]:\n relations = []\n for item in data:\n for prediction in item.data[\"value\"][\"data\"].prediction:\n attributes = prediction.Attributes\n if attributes is not None and len(attributes) > 0:\n relations.append(prediction)\n return relations\n" + "value": "\"\"\"Blob Storage Component for loading files from Azure Blob Storage.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom langflow.custom import Component\nfrom langflow.io import BoolInput, DropdownInput, Output, StrInput\nfrom langflow.schema.data import Data\nfrom langflow.services.deps import get_flexstore_service\nfrom loguru import logger\n\n\nclass BlobStorageComponent(Component):\n display_name = \"Blob Storage\"\n category: str = \"input_output\"\n description = \"Load files from Azure Blob Storage\"\n documentation = \"http://docs.langflow.org/components/storage\"\n icon = \"Autonomize\"\n name = \"BlobStorage\"\n\n # Match the property name expected by FileComponent\n FILE_PATH_FIELD = \"file_path\"\n\n def __init__(self, **kwargs):\n super().__init__(**kwargs)\n self._container_list: list[str] = []\n self._file_list: list[str] = []\n\n inputs = [\n StrInput(\n name=\"storage_account\",\n display_name=\"Storage Account\",\n required=False,\n info=\"Storage Account name\",\n advanced=True,\n ),\n DropdownInput(\n name=\"container_name\",\n display_name=\"Container\",\n info=\"Select a container from the storage account\",\n required=True,\n refresh_button=True,\n ),\n DropdownInput(\n name=\"file_name\",\n display_name=\"File\",\n info=\"Select a file from the container\",\n required=True,\n refresh_button=True,\n ),\n BoolInput(\n name=\"return_all_files\",\n display_name=\"Return All Files\",\n info=\"If true and no specific file is selected, returns all files in the container\",\n value=True,\n ),\n ]\n\n outputs = [\n Output(\n name=\"file_path\", # Match the property name expected by FileComponent\n display_name=\"File Path\",\n method=\"get_file_paths\",\n ),\n ]\n\n async def update_build_config(\n self, build_config: dict, field_value: Any, field_name: str | None = None\n ):\n \"\"\"Update the build configuration based on field changes.\"\"\"\n logger.info(f\"update_build_config called with field_name: {field_name}\")\n\n storage_account = getattr(self, \"storage_account\", None)\n container_name = getattr(self, \"container_name\", None)\n\n if field_name == \"container_name\":\n try:\n # Load the container options when the field is refreshed\n service = get_flexstore_service()\n self._container_list = await service.get_containers(storage_account)\n\n build_config[\"container_name\"][\"options\"] = self._container_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating container list: {e!s}\")\n raise\n\n elif field_name == \"file_name\" and container_name:\n try:\n # Load the file options when the field is refreshed\n service = get_flexstore_service()\n self._file_list = await service.get_files(\n storage_account, container_name\n )\n\n build_config[\"file_name\"][\"options\"] = self._file_list\n return build_config\n\n except Exception as e:\n logger.exception(f\"Error updating file list: {e!s}\")\n raise\n\n return build_config\n\n async def get_file_paths(self) -> list[Data]:\n \"\"\"Get file paths for the FileComponent to process.\"\"\"\n try:\n if not self.container_name:\n logger.warning(\"Container name is required.\")\n return []\n\n service = get_flexstore_service()\n file_paths = []\n\n # If a specific file is selected\n if self.file_name:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, self.file_name\n )\n if signed_url:\n file_paths = [Data(data={self.FILE_PATH_FIELD: signed_url})]\n # If no specific file is selected and return_all_files is True\n elif self.return_all_files:\n files = await service.get_files(\n self.storage_account, self.container_name\n )\n for file in files:\n signed_url = await service.get_signed_url(\n self.storage_account, self.container_name, file\n )\n if signed_url:\n file_paths.append(Data(data={self.FILE_PATH_FIELD: signed_url}))\n\n if file_paths:\n self.status = file_paths\n logger.info(f\"Generated {len(file_paths)} file paths\")\n for path in file_paths:\n logger.debug(f\"File path: {path.data.get(self.FILE_PATH_FIELD)}\")\n else:\n logger.warning(\"No file paths generated\")\n\n return file_paths\n\n except Exception as e:\n logger.error(f\"Error in get_file_paths: {e!s}\")\n return []\n" }, - "data": { - "_input_type": "DataInput", + "container_name": { + "_input_type": "DropdownInput", "advanced": false, - "display_name": "Data", + "combobox": false, + "dialog_inputs": {}, + "display_name": "Container", "dynamic": false, - "info": "The data to convert to text.", - "input_types": [ - "Data" + "external_options": {}, + "info": "Select a container from the storage account", + "name": "container_name", + "options": [ + "a-and-g-case-summary", + "a-and-g-case-summary-for-client", + "a-and-g-input", + "a-and-g-ocr-cache", + "a-and-g-ocr-post-processed-cache", + "a-and-g-page-images", + "a-and-g-reimagined", + "ai-studio-v2", + "aircare-dev-4002", + "aml-environment-image-build", + "ang-images", + "anywhere-200-files-input", + "anywhere-200-files-output", + "anywhere-input-docs-test", + "anywhere-output-docs-test", + "argo-flow-artifacts", + "atena", + "athena-data", + "autonomize-mlflow-artifacts", + "azure-webjobs-hosts", + "azure-webjobs-secrets", + "azureml", + "azureml-blobstore-3894b54e-0ee2-4e07-9b73-c3b30dc36b53", + "azureml-environments", + "azureml-metrics", + "backup-of-aws-instance", + "bcbs-ma-interqual-medical-policy", + "bcbs-medical-policy", + "benefit-accumulator-guidelines", + "benefit-check-eoc", + "benefit-eoc-guidelines", + "benefit-eoc-output", + "benefit-eoc-payload", + "bluecross-ca", + "carelon-guidelines", + "carelon-guidelines-v2", + "claims-docs", + "claims-qnext-response", + "cllm-v2-data", + "cms", + "correspondance-examples", + "datavant-storage-container", + "eoc-documents", + "etf-images", + "evicore-guidelines", + "fax-documents", + "fax-images", + "fax-images-2", + "fax-insights", + "fax-ocr-cache", + "fax-processor-validation-datasets", + "genesis-container", + "genesis-correspondence-automation-copilot", + "genesis-correspondence-bulk-ingestion-test", + "genesis-cph-demo-v2", + "genesis-dev-test-3012", + "genesis-dev-v2", + "genesis-platform-3010", + "genesis-platform-anywhere-prod", + "genesis-platform-cigna-dev", + "genesis-platform-demo", + "genesis-platform-demo-v2", + "genesis-platform-dev", + "genesis-platform-molina-uat", + "genesis-platform-qa", + "genesis-platform-v2-lab", + "hedis-page-images", + "hedis-page-ocr", + "indexbackup", + "insights-logs-auditevent", + "insights-metrics-pt1m", + "issue-test-1", + "job-test", + "k-hub-container", + "knowledgehubembeddings", + "load-testing-files-hedis", + "mail-images", + "mlflow", + "mlflow-dev-v2", + "mlflow-integration", + "mlflow-mssql", + "mlflow-qa", + "mlflowmssql", + "mlserver-artifacts", + "mlworkspace-backup", + "model-artifacts", + "modelcards", + "modelhub", + "modelhub-demo-v2", + "modelhub-v2-lab", + "models", + "molina-refactor-temporary", + "molina-refactor-test", + "mosaic-models", + "mosaic-provider-contracts", + "mosaic-provider-contracts-temp", + "mosaic-testing", + "ner-container", + "nestedcontainer", + "pcp-docs", + "pcp-extraction-docs", + "pcp-ocr-cache", + "pharmacy-auth", + "prior-auth", + "prior-authorization", + "projectx-files", + "projectx-temp-files", + "revisions", + "shahabas-mlflow-artifacts", + "snapshots", + "snapshotzips", + "spog-output", + "spog-qnext", + "string", + "temp-directory-genesis-studio", + "temp-studio", + "temp-studio-v2", + "temporal-poc", + "test-cms", + "test-storage-container", + "test1", + "tester-fax", + "umk2dev", + "weaviate-backups" ], - "list": true, - "name": "data", + "options_metadata": [], + "placeholder": "", + "refresh_button": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "genesis-platform-demo" + }, + "file_name": { + "_input_type": "DropdownInput", + "advanced": false, + "combobox": false, + "dialog_inputs": {}, + "display_name": "File", + "dynamic": false, + "external_options": {}, + "info": "Select a file from the container", + "name": "file_name", + "options": [ + "01_CaseId_MS_001.pdf", + "01_case1_redacted.pdf", + "02_0case.pdf", + "02_KJJ copy 6.pdf", + "02_KJJ copy 7.pdf", + "1056_Subject (1).pdf", + "1900_PriorAuthSample.pdf", + "2001_Subject (1).pdf", + "2001_Subject-1.pdf", + "2001_Subject.pdf", + "28ee05bd-5a48-49c1-ae3b-f81c7d1523bftesttest123.pdf", + "400_PATIENT (1).pdf", + "400_PATIENT.pdf", + "5af97bb72e4e4375829efa0a8f2db776.pdf", + "66f949a74ecda992ccc91ade_66f957aa4ecda992ccc91c09_1.png", + "6_SampleAuthScan.pdf", + "913 Manual Pend Codes - All States Medicaid and Medicare - Job Aid (1).pdf", + "913 Manual Pend Codes - All States Medicaid and Medicare - Job Aid.pdf", + "99736_PermE8_Studies_ALL.pdf", + "99736_PermE8_Studies_ALL123.pdf", + "A Retrospective Study of Clinical Efficacy.pdf", + "AGMT - HSA - BCA of Detroit LLC dba BCA StoneCrest Center (1).pdf", + "Alta Bates 2.pdf", + "Authorization Business Process - All States and LOBs - SOP.pdf", + "BSS-CMS-1500-Fillable-2022-1 2.png", + "BSS-CMS-1500-Fillable-2022-1.png", + "BSS-CMS-1500-Fillable-2022.pdf", + "BSS-CMS-1500-Fillable.png", + "Banner_Square_Code.png", + "CMS 1500_SAMPLE_2024.png", + "CMS 1500_Sample.pdf", + "Case8_PAC.pdf", + "Clotrimazole.pdf", + "Cody Bradshaw Resume.pdf", + "Coordination of Benefits (COB) - All States Medicaid - SOP (1).pdf", + "Coordination of Benefits (COB) - All States Medicaid - SOP (3).pdf", + "Coordination of Benefits (COB) - All States Medicaid - SOP.pdf", + "Corrected Claims and Replacement Bills - Medicare All States- Processing Guideline (1).pdf", + "Corrected Claims and Replacement Bills - Medicare All States- Processing Guideline .pdf", + "Duplicate Claim Processing - Medicare All States - Processing Guideline (1).pdf", + "Formulation PDF example (from FMS System).pdf", + "Formulation attachments PDF2.pdf", + "Gabapentin.pdf", + "PCP 26 JD.pdf", + "PCP 34 JNP.pdf", + "Patient Information HC2 - CBP.pdf", + "Prior Auth Sample 1.pdf", + "PriorAuth000.pdf", + "PriorAuth000_1.pdf", + "PriorAuthSample1_2.pdf", + "PriorAuthSample2 (1).pdf", + "PriorAuthSample2 (2).pdf", + "PriorAuthSample2.pdf", + "PriorAuthSample3 copy.pdf", + "PriorAuthSample3.pdf", + "PriorAuthSample5.pdf", + "Residential Purchase Agreement.pdf", + "Screenshot 2024-10-09 at 13.26.14.png", + "Screenshot 2024-10-10 at 13.23.26.png", + "Screenshot 2024-10-10 at 15.19.00.png", + "Screenshot 2024-10-21 at 12.49.29 PM.png", + "Screenshot 2024-10-21 at 18.05.46.png", + "Standard_Mutual_NDA__2024_.docx.pdf", + "Test1tolstoy (1).pdf", + "UB04 SAMPLE .pdf", + "UB04 SAMPLE .png", + "ZIP5lyout.pdf", + "complaince_summary.pdf", + "discharge_summary.pdf", + "form-1a.pdf", + "only-toc.pdf", + "patient_12783127.pdf", + "patient_12783135-2.pdf", + "patient_summary.pdf", + "prior-auth-doc.pdf", + "source_1729618954942.pdf", + "test2tolstoy (1).pdf" + ], + "options_metadata": [], + "placeholder": "", + "refresh_button": true, + "required": true, + "show": true, + "title_case": false, + "toggle": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", + "value": "discharge_summary.pdf" + }, + "return_all_files": { + "_input_type": "BoolInput", + "advanced": false, + "display_name": "Return All Files", + "dynamic": false, + "info": "If true and no specific file is selected, returns all files in the container", + "list": false, + "list_add_label": "Add More", + "name": "return_all_files", "placeholder": "", "required": false, "show": true, "title_case": false, "tool_mode": false, - "trace_as_input": true, "trace_as_metadata": true, - "type": "other", + "type": "bool", + "value": true + }, + "storage_account": { + "_input_type": "StrInput", + "advanced": true, + "display_name": "Storage Account", + "dynamic": false, + "info": "Storage Account name", + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "name": "storage_account", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "str", "value": "" } }, "tool_mode": false }, "showNode": true, - "type": "RelationExtraction" + "type": "BlobStorage" }, - "dragging": false, - "id": "RelationExtraction-TvyqO", + "id": "BlobStorage-Yg2WN", "measured": { - "height": 216, + "height": 329, "width": 320 }, "position": { - "x": 2133.055996705636, - "y": 130.71021965993916 + "x": 933.359375, + "y": 216.68153235521322 }, "selected": false, "type": "genericNode" } ], "viewport": { - "x": -533.2080951986136, - "y": 226.38671642955805, - "zoom": 0.33885246599046315 + "x": -382.65386957767066, + "y": 91.01608428813608, + "zoom": 0.4539021955799936 } }, "description": "Identifies and extracts relationships between clinical entities, such as treatments, and their dates, from medical documents to support deeper data analysis and knowledge discovery", "endpoint_name": null, - "id": "ad58a538-be66-4a71-88cf-ce0e8a151953", + "id": "34687deb-66f4-436b-9e9f-16b34a3e9d80", "is_component": false, - "last_tested_version": "1.1.1", - "name": "Relation Extraction Agent", + "last_tested_version": "1.6.3", + "name": "Relation Extraction", "tags": [ "chart-review" ] diff --git a/src/backend/base/langflow/utils/starter_projects_utils.py b/src/backend/base/langflow/utils/starter_projects_utils.py index f42ac00579e6..9c994e542261 100644 --- a/src/backend/base/langflow/utils/starter_projects_utils.py +++ b/src/backend/base/langflow/utils/starter_projects_utils.py @@ -47,21 +47,28 @@ def get_starter_projects_json_content(): # Hardcoded list of specific files to include in basic_examples response ALLOWED_BASIC_EXAMPLE_FILES = [ "AskAutoAgent.json", + "Entity Normalization Agent.json", + "Prior Auth Form Extraction Agent.json", + "Relationship Extraction Agent.json", + "CPT Code Agent.json", + "Clinical Entities Extraction.json", + "Auth Guideline.json", + "BenefitCheckAgent.json", + "Clinical Entities Extraction.json", "EOCCheckAgent.json", "EligibilityChecker.json", "AccumulatorCheckAgent.json", "ICD Extractor Agent.json", "IE Criteria Simplification.json", "sumarization agent.json", - "ICD Extractor Agent.json", "Lab Value Extraction.json", "Auth Guideline.json", "AttachDocumentAgent.json", "guideline-retrieval-agent.json", "Document Retrieval Agent.json", "Simple Agent.json", - "AccumulatorCheckAgent.json" + # Add more filenames here as needed diff --git a/src/frontend/src/controllers/API/helpers/constants.ts b/src/frontend/src/controllers/API/helpers/constants.ts index dce035e5490e..5d084a4a3f3d 100644 --- a/src/frontend/src/controllers/API/helpers/constants.ts +++ b/src/frontend/src/controllers/API/helpers/constants.ts @@ -29,7 +29,7 @@ export const URLs = { ALL: `all`, VOICE: `voice`, PUBLIC_FLOW: `flows/public_flow`, - MCP: `mcp`, + MCP: `mcp/project`, MCP_SERVERS: `mcp/servers`, KNOWLEDGE_BASES: `knowledge_bases`, AGENT_MARKETPLACE: `agent-marketplace`, diff --git a/src/frontend/src/controllers/API/queries/mcp/use-add-mcp-server.ts b/src/frontend/src/controllers/API/queries/mcp/use-add-mcp-server.ts index 6fbcbb42f34b..3fe626f8e337 100644 --- a/src/frontend/src/controllers/API/queries/mcp/use-add-mcp-server.ts +++ b/src/frontend/src/controllers/API/queries/mcp/use-add-mcp-server.ts @@ -39,7 +39,7 @@ export const useAddMCPServer: useMutationFunctionType< } const res = await api.post( - `${getURL("MCP_SERVERS", undefined, false)}/${body.name}`, + `${getURL("MCP_SERVERS", undefined, true)}/${body.name}`, payload, ); diff --git a/src/frontend/src/controllers/API/queries/mcp/use-get-mcp-server.ts b/src/frontend/src/controllers/API/queries/mcp/use-get-mcp-server.ts index faf4a5c1b677..4cbcd2ce98cc 100644 --- a/src/frontend/src/controllers/API/queries/mcp/use-get-mcp-server.ts +++ b/src/frontend/src/controllers/API/queries/mcp/use-get-mcp-server.ts @@ -19,7 +19,7 @@ export const useGetMCPServer: useMutationFunctionType< const responseFn = async (params: IGetMCPServer) => { const { data } = await api.get>( - `${getURL("MCP_SERVERS", undefined, false)}/${params.name}`, + `${getURL("MCP_SERVERS", undefined, true)}/${params.name}`, ); return { ...data, name: params.name }; diff --git a/src/frontend/src/controllers/API/queries/mcp/use-get-mcp-servers.ts b/src/frontend/src/controllers/API/queries/mcp/use-get-mcp-servers.ts index 88fff7c2810b..f259b4aa9088 100644 --- a/src/frontend/src/controllers/API/queries/mcp/use-get-mcp-servers.ts +++ b/src/frontend/src/controllers/API/queries/mcp/use-get-mcp-servers.ts @@ -20,7 +20,7 @@ export const useGetMCPServers: useQueryFunctionType< const responseFn = async () => { try { const { data } = await api.get( - `${getURL("MCP_SERVERS", undefined, false)}?action_count=false`, + `${getURL("MCP_SERVERS", undefined, true)}?action_count=false`, ); // Merge with cached data to preserve non-null mode/toolsCount const cachedData = queryClient.getQueryData(["useGetMCPServers"]) as @@ -54,7 +54,7 @@ export const useGetMCPServers: useQueryFunctionType< const fetchWithCounts = async () => { try { const { data } = await api.get( - `${getURL("MCP_SERVERS", undefined, false)}?action_count=true`, + `${getURL("MCP_SERVERS", undefined, true)}?action_count=true`, ); return data; } catch (error) { diff --git a/src/frontend/src/controllers/API/queries/mcp/use-patch-mcp-server.ts b/src/frontend/src/controllers/API/queries/mcp/use-patch-mcp-server.ts index 19ddb8de438b..3dc94c55fb67 100644 --- a/src/frontend/src/controllers/API/queries/mcp/use-patch-mcp-server.ts +++ b/src/frontend/src/controllers/API/queries/mcp/use-patch-mcp-server.ts @@ -40,7 +40,7 @@ export const usePatchMCPServer: useMutationFunctionType< } const res = await api.patch( - `${getURL("MCP_SERVERS", undefined, false)}/${body.name}`, + `${getURL("MCP_SERVERS", undefined, true)}/${body.name}`, payload, );