diff --git a/.github/workflows/typescript_test.yml b/.github/workflows/typescript_test.yml index bb4039042571..580d3fcd2a80 100644 --- a/.github/workflows/typescript_test.yml +++ b/.github/workflows/typescript_test.yml @@ -220,12 +220,12 @@ jobs: echo "Total tests to run: $TEST_COUNT" - # Calculate optimal shard count - 1 shard per 5 tests, min 1, max 10 + # Calculate optimal shard count - 1 shard per 5 tests, min 1, max 40 SHARD_COUNT=$(( (TEST_COUNT + 4) / 5 )) if [ $SHARD_COUNT -lt 1 ]; then SHARD_COUNT=1 - elif [ $SHARD_COUNT -gt 10 ]; then - SHARD_COUNT=10 + elif [ $SHARD_COUNT -gt 40 ]; then + SHARD_COUNT=40 fi # Create the matrix combinations string diff --git a/src/backend/base/langflow/components/processing/split_text.py b/src/backend/base/langflow/components/processing/split_text.py index c07600e62e34..610670bc05a9 100644 --- a/src/backend/base/langflow/components/processing/split_text.py +++ b/src/backend/base/langflow/components/processing/split_text.py @@ -64,8 +64,7 @@ class SplitTextComponent(Component): ] outputs = [ - Output(display_name="Chunks", name="chunks", method="split_text"), - Output(display_name="DataFrame", name="dataframe", method="as_dataframe"), + Output(display_name="Chunks", name="dataframe", method="split_text"), ] def _docs_to_data(self, docs) -> list[Data]: @@ -133,8 +132,5 @@ def split_text_base(self): msg = f"Error splitting text: {e}" raise TypeError(msg) from e - def split_text(self) -> list[Data]: - return self._docs_to_data(self.split_text_base()) - - def as_dataframe(self) -> DataFrame: - return DataFrame(self.split_text()) + def split_text(self) -> DataFrame: + return DataFrame(self._docs_to_data(self.split_text_base())) diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index df2b181f0dc4..34a68c06ad72 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -7,7 +7,7 @@ "data": { "sourceHandle": { "dataType": "ChatInput", - "id": "ChatInput-FzOTA", + "id": "ChatInput-kNQkx", "name": "message", "output_types": [ "Message" @@ -15,7 +15,7 @@ }, "targetHandle": { "fieldName": "question", - "id": "Prompt-kr3Rx", + "id": "Prompt-zHQI0", "inputTypes": [ "Message", "Text" @@ -23,39 +23,12 @@ "type": "str" } }, - "id": "reactflow__edge-ChatInput-FzOTA{œdataTypeœ:œChatInputœ,œidœ:œChatInput-FzOTAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-kr3Rx{œfieldNameœ:œquestionœ,œidœ:œPrompt-kr3Rxœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-ChatInput-kNQkx{œdataTypeœ:œChatInputœ,œidœ:œChatInput-kNQkxœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-zHQI0{œfieldNameœ:œquestionœ,œidœ:œPrompt-zHQI0œ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", "selected": false, - "source": "ChatInput-FzOTA", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-FzOTAœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-kr3Rx", - "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-kr3Rxœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" - }, - { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "File", - "id": "File-EO8pn", - "name": "data", - "output_types": [] - }, - "targetHandle": { - "fieldName": "data_inputs", - "id": "SplitText-aHhAi", - "inputTypes": [ - "Data", - "DataFrame" - ], - "type": "other" - } - }, - "id": "reactflow__edge-File-CBftc{œdataTypeœ:œFileœ,œidœ:œFile-CBftcœ,œnameœ:œdataœ,œoutput_typesœ:[œDataœ]}-SplitText-gIoap{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-gIoapœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", - "selected": false, - "source": "File-EO8pn", - "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-EO8pnœ, œnameœ: œdataœ, œoutput_typesœ: []}", - "target": "SplitText-aHhAi", - "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-aHhAiœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" + "source": "ChatInput-kNQkx", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-kNQkxœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-zHQI0", + "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-zHQI0œ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -63,7 +36,7 @@ "data": { "sourceHandle": { "dataType": "Prompt", - "id": "Prompt-kr3Rx", + "id": "Prompt-zHQI0", "name": "prompt", "output_types": [ "Message" @@ -71,19 +44,19 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "OpenAIModel-7W8gE", + "id": "OpenAIModel-9bWp2", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-Prompt-kr3Rx{œdataTypeœ:œPromptœ,œidœ:œPrompt-kr3Rxœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-OpenAIModel-Ej17f{œfieldNameœ:œinput_valueœ,œidœ:œOpenAIModel-Ej17fœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-Prompt-zHQI0{œdataTypeœ:œPromptœ,œidœ:œPrompt-zHQI0œ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-OpenAIModel-9bWp2{œfieldNameœ:œinput_valueœ,œidœ:œOpenAIModel-9bWp2œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "Prompt-kr3Rx", - "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-kr3Rxœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", - "target": "OpenAIModel-Ej17f", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œOpenAIModel-7W8gEœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "Prompt-zHQI0", + "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-zHQI0œ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", + "target": "OpenAIModel-9bWp2", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œOpenAIModel-9bWp2œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -91,7 +64,7 @@ "data": { "sourceHandle": { "dataType": "OpenAIModel", - "id": "OpenAIModel-Ej17f", + "id": "OpenAIModel-9bWp2", "name": "text_output", "output_types": [ "Message" @@ -99,7 +72,7 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "ChatOutput-mbLiD", + "id": "ChatOutput-GAFHg", "inputTypes": [ "Data", "DataFrame", @@ -108,12 +81,12 @@ "type": "str" } }, - "id": "reactflow__edge-OpenAIModel-Ej17f{œdataTypeœ:œOpenAIModelœ,œidœ:œOpenAIModel-Ej17fœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-nGc6Z{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-nGc6Zœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-OpenAIModel-9bWp2{œdataTypeœ:œOpenAIModelœ,œidœ:œOpenAIModel-9bWp2œ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-GAFHg{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-GAFHgœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "OpenAIModel-Ej17f", - "sourceHandle": "{œdataTypeœ: œOpenAIModelœ, œidœ: œOpenAIModel-Ej17fœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", - "target": "ChatOutput-nGc6Z", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-mbLiDœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œstrœ}" + "source": "OpenAIModel-9bWp2", + "sourceHandle": "{œdataTypeœ: œOpenAIModelœ, œidœ: œOpenAIModel-9bWp2œ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", + "target": "ChatOutput-GAFHg", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-GAFHgœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -121,7 +94,7 @@ "data": { "sourceHandle": { "dataType": "parser", - "id": "parser-YIJGN", + "id": "parser-Qet8H", "name": "parsed_text", "output_types": [ "Message" @@ -129,7 +102,7 @@ }, "targetHandle": { "fieldName": "context", - "id": "Prompt-dcKE8", + "id": "Prompt-zHQI0", "inputTypes": [ "Message", "Text" @@ -137,41 +110,12 @@ "type": "str" } }, - "id": "reactflow__edge-parser-YIJGN{œdataTypeœ:œparserœ,œidœ:œparser-YIJGNœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-kr3Rx{œfieldNameœ:œcontextœ,œidœ:œPrompt-kr3Rxœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-parser-Qet8H{œdataTypeœ:œparserœ,œidœ:œparser-Qet8Hœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-zHQI0{œfieldNameœ:œcontextœ,œidœ:œPrompt-zHQI0œ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", "selected": false, - "source": "parser-YIJGN", - "sourceHandle": "{œdataTypeœ: œparserœ, œidœ: œparser-YIJGNœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-kr3Rx", - "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-dcKE8œ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" - }, - { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "SplitText", - "id": "SplitText-aHhAi", - "name": "chunks", - "output_types": [ - "Data" - ] - }, - "targetHandle": { - "fieldName": "ingest_data", - "id": "AstraDB-xD6ep", - "inputTypes": [ - "Data", - "DataFrame" - ], - "type": "other" - } - }, - "id": "reactflow__edge-SplitText-aHhAi{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-aHhAiœ,œnameœ:œchunksœ,œoutput_typesœ:[œDataœ]}-AstraDB-lXzoG{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-lXzoGœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", - "selected": false, - "source": "SplitText-aHhAi", - "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-aHhAiœ, œnameœ: œchunksœ, œoutput_typesœ: [œDataœ]}", - "target": "AstraDB-lXzoG", - "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-xD6epœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" + "source": "parser-Qet8H", + "sourceHandle": "{œdataTypeœ: œparserœ, œidœ: œparser-Qet8Hœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-zHQI0", + "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-zHQI0œ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -179,7 +123,7 @@ "data": { "sourceHandle": { "dataType": "OpenAIEmbeddings", - "id": "OpenAIEmbeddings-tSZ8A", + "id": "OpenAIEmbeddings-D1jSt", "name": "embeddings", "output_types": [ "Embeddings" @@ -187,19 +131,19 @@ }, "targetHandle": { "fieldName": "embedding_model", - "id": "AstraDB-xD6ep", + "id": "AstraDB-eQaxM", "inputTypes": [ "Embeddings" ], "type": "other" } }, - "id": "reactflow__edge-OpenAIEmbeddings-tSZ8A{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-tSZ8Aœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-lXzoG{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-lXzoGœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-OpenAIEmbeddings-D1jSt{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-D1jStœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-eQaxM{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-eQaxMœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", "selected": false, - "source": "OpenAIEmbeddings-tSZ8A", - "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-tSZ8Aœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", - "target": "AstraDB-lXzoG", - "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-xD6epœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" + "source": "OpenAIEmbeddings-D1jSt", + "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-D1jStœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", + "target": "AstraDB-eQaxM", + "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-eQaxMœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" }, { "animated": false, @@ -207,7 +151,7 @@ "data": { "sourceHandle": { "dataType": "OpenAIEmbeddings", - "id": "OpenAIEmbeddings-M2xTe", + "id": "OpenAIEmbeddings-4Uky4", "name": "embeddings", "output_types": [ "Embeddings" @@ -215,19 +159,19 @@ }, "targetHandle": { "fieldName": "embedding_model", - "id": "AstraDB-PTTd1", + "id": "AstraDB-tVkFw", "inputTypes": [ "Embeddings" ], "type": "other" } }, - "id": "reactflow__edge-OpenAIEmbeddings-M2xTe{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-M2xTeœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-BRnBB{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-BRnBBœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-OpenAIEmbeddings-4Uky4{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-4Uky4œ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-tVkFw{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-tVkFwœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", "selected": false, - "source": "OpenAIEmbeddings-M2xTe", - "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-M2xTeœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", - "target": "AstraDB-BRnBB", - "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-PTTd1œ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" + "source": "OpenAIEmbeddings-4Uky4", + "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-4Uky4œ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", + "target": "AstraDB-tVkFw", + "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-tVkFwœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" }, { "animated": false, @@ -235,7 +179,7 @@ "data": { "sourceHandle": { "dataType": "ChatInput", - "id": "ChatInput-FzOTA", + "id": "ChatInput-kNQkx", "name": "message", "output_types": [ "Message" @@ -243,19 +187,19 @@ }, "targetHandle": { "fieldName": "search_query", - "id": "AstraDB-PTTd1", + "id": "AstraDB-tVkFw", "inputTypes": [ "Message" ], "type": "query" } }, - "id": "reactflow__edge-ChatInput-FzOTA{œdataTypeœ:œChatInputœ,œidœ:œChatInput-FzOTAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-BRnBB{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-BRnBBœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", + "id": "reactflow__edge-ChatInput-kNQkx{œdataTypeœ:œChatInputœ,œidœ:œChatInput-kNQkxœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-tVkFw{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-tVkFwœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", "selected": false, - "source": "ChatInput-FzOTA", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-FzOTAœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "AstraDB-BRnBB", - "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-PTTd1œ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}" + "source": "ChatInput-kNQkx", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-kNQkxœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", + "target": "AstraDB-tVkFw", + "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-tVkFwœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}" }, { "animated": false, @@ -263,7 +207,7 @@ "data": { "sourceHandle": { "dataType": "AstraDB", - "id": "AstraDB-BRnBB", + "id": "AstraDB-tVkFw", "name": "dataframe", "output_types": [ "DataFrame" @@ -271,7 +215,7 @@ }, "targetHandle": { "fieldName": "input_data", - "id": "parser-l9sAS", + "id": "parser-Qet8H", "inputTypes": [ "DataFrame", "Data" @@ -279,12 +223,12 @@ "type": "other" } }, - "id": "reactflow__edge-AstraDB-BRnBB{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-BRnBBœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-parser-YIJGN{œfieldNameœ:œinput_dataœ,œidœ:œparser-YIJGNœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-AstraDB-tVkFw{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-tVkFwœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-parser-Qet8H{œfieldNameœ:œinput_dataœ,œidœ:œparser-Qet8Hœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", "selected": false, - "source": "AstraDB-BRnBB", - "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-BRnBBœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", - "target": "parser-YIJGN", - "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œparser-l9sASœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" + "source": "AstraDB-tVkFw", + "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-tVkFwœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", + "target": "parser-Qet8H", + "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œparser-Qet8Hœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" }, { "animated": false, @@ -292,7 +236,7 @@ "data": { "sourceHandle": { "dataType": "File", - "id": "File-EO8pn", + "id": "File-kPRpn", "name": "dataframe", "output_types": [ "DataFrame" @@ -300,7 +244,7 @@ }, "targetHandle": { "fieldName": "data_inputs", - "id": "SplitText-aHhAi", + "id": "SplitText-sDxql", "inputTypes": [ "Data", "DataFrame" @@ -308,12 +252,38 @@ "type": "other" } }, - "id": "xy-edge__File-EO8pn{œdataTypeœ:œFileœ,œidœ:œFile-EO8pnœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-SplitText-aHhAi{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-aHhAiœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-File-kPRpn{œdataTypeœ:œFileœ,œidœ:œFile-kPRpnœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-SplitText-sDxql{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-sDxqlœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", "selected": false, - "source": "File-EO8pn", - "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-EO8pnœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", - "target": "SplitText-aHhAi", - "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-aHhAiœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" + "source": "File-kPRpn", + "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-kPRpnœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", + "target": "SplitText-sDxql", + "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-sDxqlœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" + }, + { + "data": { + "sourceHandle": { + "dataType": "SplitText", + "id": "SplitText-sDxql", + "name": "dataframe", + "output_types": [ + "DataFrame" + ] + }, + "targetHandle": { + "fieldName": "ingest_data", + "id": "AstraDB-eQaxM", + "inputTypes": [ + "Data", + "DataFrame" + ], + "type": "other" + } + }, + "id": "xy-edge__SplitText-sDxql{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-sDxqlœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-AstraDB-eQaxM{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-eQaxMœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", + "source": "SplitText-sDxql", + "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-sDxqlœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", + "target": "AstraDB-eQaxM", + "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-eQaxMœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" } ], "nodes": [ @@ -321,7 +291,7 @@ "data": { "description": "Get chat inputs from the Playground.", "display_name": "Chat Input", - "id": "ChatInput-FzOTA", + "id": "ChatInput-kNQkx", "node": { "base_classes": [ "Message" @@ -587,7 +557,7 @@ }, "dragging": false, "height": 234, - "id": "ChatInput-FzOTA", + "id": "ChatInput-kNQkx", "measured": { "height": 234, "width": 320 @@ -608,7 +578,7 @@ "data": { "description": "Create a prompt template with dynamic variables.", "display_name": "Prompt", - "id": "Prompt-kr3Rx", + "id": "Prompt-zHQI0", "node": { "base_classes": [ "Message" @@ -768,7 +738,7 @@ }, "dragging": false, "height": 433, - "id": "Prompt-kr3Rx", + "id": "Prompt-zHQI0", "measured": { "height": 433, "width": 320 @@ -789,7 +759,7 @@ "data": { "description": "Split text into chunks based on specified criteria.", "display_name": "Split Text", - "id": "SplitText-aHhAi", + "id": "SplitText-sDxql", "node": { "base_classes": [ "Data" @@ -820,20 +790,6 @@ "display_name": "Chunks", "group_outputs": false, "method": "split_text", - "name": "chunks", - "selected": "Data", - "tool_mode": true, - "types": [ - "Data" - ], - "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "DataFrame", - "group_outputs": false, - "method": "as_dataframe", "name": "dataframe", "selected": "DataFrame", "tool_mode": true, @@ -892,7 +848,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n display_name: str = \"Split Text\"\n description: str = \"Split text into chunks based on specified criteria.\"\n icon = \"scissors-line-dashed\"\n name = \"SplitText\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Data or DataFrame\",\n info=\"The data with texts to split in chunks.\",\n input_types=[\"Data\", \"DataFrame\"],\n required=True,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"Number of characters to overlap between chunks.\",\n value=200,\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=(\n \"The maximum length of each chunk. Text is first split by separator, \"\n \"then chunks are merged up to this size. \"\n \"Individual splits larger than this won't be further divided.\"\n ),\n value=1000,\n ),\n MessageTextInput(\n name=\"separator\",\n display_name=\"Separator\",\n info=(\n \"The character to split on. Use \\\\n for newline. \"\n \"Examples: \\\\n\\\\n for paragraphs, \\\\n for lines, . for sentences\"\n ),\n value=\"\\n\",\n ),\n MessageTextInput(\n name=\"text_key\",\n display_name=\"Text Key\",\n info=\"The key to use for the text column.\",\n value=\"text\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keep_separator\",\n display_name=\"Keep Separator\",\n info=\"Whether to keep the separator in the output chunks and where to place it.\",\n options=[\"False\", \"True\", \"Start\", \"End\"],\n value=\"False\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Chunks\", name=\"chunks\", method=\"split_text\"),\n Output(display_name=\"DataFrame\", name=\"dataframe\", method=\"as_dataframe\"),\n ]\n\n def _docs_to_data(self, docs) -> list[Data]:\n return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]\n\n def _fix_separator(self, separator: str) -> str:\n \"\"\"Fix common separator issues and convert to proper format.\"\"\"\n if separator == \"/n\":\n return \"\\n\"\n if separator == \"/t\":\n return \"\\t\"\n return separator\n\n def split_text_base(self):\n separator = self._fix_separator(self.separator)\n separator = unescape_string(separator)\n\n if isinstance(self.data_inputs, DataFrame):\n if not len(self.data_inputs):\n msg = \"DataFrame is empty\"\n raise TypeError(msg)\n\n self.data_inputs.text_key = self.text_key\n try:\n documents = self.data_inputs.to_lc_documents()\n except Exception as e:\n msg = f\"Error converting DataFrame to documents: {e}\"\n raise TypeError(msg) from e\n else:\n if not self.data_inputs:\n msg = \"No data inputs provided\"\n raise TypeError(msg)\n\n documents = []\n if isinstance(self.data_inputs, Data):\n self.data_inputs.text_key = self.text_key\n documents = [self.data_inputs.to_lc_document()]\n else:\n try:\n documents = [input_.to_lc_document() for input_ in self.data_inputs if isinstance(input_, Data)]\n if not documents:\n msg = f\"No valid Data inputs found in {type(self.data_inputs)}\"\n raise TypeError(msg)\n except AttributeError as e:\n msg = f\"Invalid input type in collection: {e}\"\n raise TypeError(msg) from e\n try:\n # Convert string 'False'/'True' to boolean\n keep_sep = self.keep_separator\n if isinstance(keep_sep, str):\n if keep_sep.lower() == \"false\":\n keep_sep = False\n elif keep_sep.lower() == \"true\":\n keep_sep = True\n # 'start' and 'end' are kept as strings\n\n splitter = CharacterTextSplitter(\n chunk_overlap=self.chunk_overlap,\n chunk_size=self.chunk_size,\n separator=separator,\n keep_separator=keep_sep,\n )\n return splitter.split_documents(documents)\n except Exception as e:\n msg = f\"Error splitting text: {e}\"\n raise TypeError(msg) from e\n\n def split_text(self) -> list[Data]:\n return self._docs_to_data(self.split_text_base())\n\n def as_dataframe(self) -> DataFrame:\n return DataFrame(self.split_text())\n" + "value": "from langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n display_name: str = \"Split Text\"\n description: str = \"Split text into chunks based on specified criteria.\"\n icon = \"scissors-line-dashed\"\n name = \"SplitText\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Data or DataFrame\",\n info=\"The data with texts to split in chunks.\",\n input_types=[\"Data\", \"DataFrame\"],\n required=True,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"Number of characters to overlap between chunks.\",\n value=200,\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=(\n \"The maximum length of each chunk. Text is first split by separator, \"\n \"then chunks are merged up to this size. \"\n \"Individual splits larger than this won't be further divided.\"\n ),\n value=1000,\n ),\n MessageTextInput(\n name=\"separator\",\n display_name=\"Separator\",\n info=(\n \"The character to split on. Use \\\\n for newline. \"\n \"Examples: \\\\n\\\\n for paragraphs, \\\\n for lines, . for sentences\"\n ),\n value=\"\\n\",\n ),\n MessageTextInput(\n name=\"text_key\",\n display_name=\"Text Key\",\n info=\"The key to use for the text column.\",\n value=\"text\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keep_separator\",\n display_name=\"Keep Separator\",\n info=\"Whether to keep the separator in the output chunks and where to place it.\",\n options=[\"False\", \"True\", \"Start\", \"End\"],\n value=\"False\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Chunks\", name=\"dataframe\", method=\"split_text\"),\n ]\n\n def _docs_to_data(self, docs) -> list[Data]:\n return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]\n\n def _fix_separator(self, separator: str) -> str:\n \"\"\"Fix common separator issues and convert to proper format.\"\"\"\n if separator == \"/n\":\n return \"\\n\"\n if separator == \"/t\":\n return \"\\t\"\n return separator\n\n def split_text_base(self):\n separator = self._fix_separator(self.separator)\n separator = unescape_string(separator)\n\n if isinstance(self.data_inputs, DataFrame):\n if not len(self.data_inputs):\n msg = \"DataFrame is empty\"\n raise TypeError(msg)\n\n self.data_inputs.text_key = self.text_key\n try:\n documents = self.data_inputs.to_lc_documents()\n except Exception as e:\n msg = f\"Error converting DataFrame to documents: {e}\"\n raise TypeError(msg) from e\n else:\n if not self.data_inputs:\n msg = \"No data inputs provided\"\n raise TypeError(msg)\n\n documents = []\n if isinstance(self.data_inputs, Data):\n self.data_inputs.text_key = self.text_key\n documents = [self.data_inputs.to_lc_document()]\n else:\n try:\n documents = [input_.to_lc_document() for input_ in self.data_inputs if isinstance(input_, Data)]\n if not documents:\n msg = f\"No valid Data inputs found in {type(self.data_inputs)}\"\n raise TypeError(msg)\n except AttributeError as e:\n msg = f\"Invalid input type in collection: {e}\"\n raise TypeError(msg) from e\n try:\n # Convert string 'False'/'True' to boolean\n keep_sep = self.keep_separator\n if isinstance(keep_sep, str):\n if keep_sep.lower() == \"false\":\n keep_sep = False\n elif keep_sep.lower() == \"true\":\n keep_sep = True\n # 'start' and 'end' are kept as strings\n\n splitter = CharacterTextSplitter(\n chunk_overlap=self.chunk_overlap,\n chunk_size=self.chunk_size,\n separator=separator,\n keep_separator=keep_sep,\n )\n return splitter.split_documents(documents)\n except Exception as e:\n msg = f\"Error splitting text: {e}\"\n raise TypeError(msg) from e\n\n def split_text(self) -> DataFrame:\n return DataFrame(self._docs_to_data(self.split_text_base()))\n" }, "data_inputs": { "advanced": false, @@ -987,7 +943,7 @@ }, "dragging": false, "height": 475, - "id": "SplitText-aHhAi", + "id": "SplitText-sDxql", "measured": { "height": 475, "width": 320 @@ -1006,7 +962,7 @@ }, { "data": { - "id": "note-eJrcq", + "id": "note-VWeXf", "node": { "description": "## 🐕 2. Retriever Flow\n\nThis flow answers your questions with contextual data retrieved from your vector database.\n\nOpen the **Playground** and ask, \n\n```\nWhat is this document about?\n```\n", "display_name": "", @@ -1019,7 +975,7 @@ }, "dragging": false, "height": 324, - "id": "note-eJrcq", + "id": "note-VWeXf", "measured": { "height": 324, "width": 325 @@ -1043,7 +999,7 @@ }, { "data": { - "id": "note-oUrKA", + "id": "note-K46GL", "node": { "description": "## 📖 README\n\nLoad your data into a vector database with the 📚 **Load Data** flow, and then use your data as chat context with the 🐕 **Retriever** flow.\n\n**🚨 Add your OpenAI API key as a global variable to easily add it to all of the OpenAI components in this flow.** \n\n**Quick start**\n1. Run the 📚 **Load Data** flow.\n2. Run the 🐕 **Retriever** flow.\n\n**Next steps** \n\n- Experiment by changing the prompt and the loaded data to see how the bot's responses change. \n\nFor more info, see the [Langflow docs](https://docs.langflow.org/starter-projects-vector-store-rag).", "display_name": "Read Me", @@ -1056,7 +1012,7 @@ }, "dragging": false, "height": 324, - "id": "note-oUrKA", + "id": "note-K46GL", "measured": { "height": 324, "width": 325 @@ -1082,7 +1038,7 @@ "data": { "description": "Display a chat message in the Playground.", "display_name": "Chat Output", - "id": "ChatOutput-nGc6Z", + "id": "ChatOutput-GAFHg", "node": { "base_classes": [ "Message" @@ -1365,7 +1321,7 @@ }, "dragging": false, "height": 234, - "id": "ChatOutput-nGc6Z", + "id": "ChatOutput-GAFHg", "measured": { "height": 234, "width": 320 @@ -1384,7 +1340,7 @@ }, { "data": { - "id": "OpenAIEmbeddings-M2xTe", + "id": "OpenAIEmbeddings-4Uky4", "node": { "base_classes": [ "Embeddings" @@ -1680,7 +1636,7 @@ "show": true, "title_case": false, "type": "str", - "value": "OPENAI_API_KEY" + "value": "" }, "openai_api_type": { "_input_type": "MessageTextInput", @@ -1863,7 +1819,7 @@ }, "dragging": false, "height": 320, - "id": "OpenAIEmbeddings-M2xTe", + "id": "OpenAIEmbeddings-4Uky4", "measured": { "height": 320, "width": 320 @@ -1876,13 +1832,13 @@ "x": 825.435626932521, "y": 739.6327999745448 }, - "selected": true, + "selected": false, "type": "genericNode", "width": 320 }, { "data": { - "id": "note-cYKfJ", + "id": "note-Jk7TI", "node": { "description": "## 📚 1. Load Data Flow\n\nRun this first! Load data from a local file and embed it into the vector database.\n\nSelect a Database and a Collection, or create new ones. \n\nClick ▶️ **Run component** on the **Astra DB** component to load your data.\n\n* If you're using OSS Langflow, add your Astra DB Application Token to the Astra DB component.\n\n#### Next steps:\n Experiment by changing the prompt and the contextual data to see how the retrieval flow's responses change.", "display_name": "", @@ -1895,7 +1851,7 @@ }, "dragging": false, "height": 324, - "id": "note-cYKfJ", + "id": "note-Jk7TI", "measured": { "height": 324, "width": 325 @@ -1919,7 +1875,7 @@ }, { "data": { - "id": "OpenAIEmbeddings-tSZ8A", + "id": "OpenAIEmbeddings-D1jSt", "node": { "base_classes": [ "Embeddings" @@ -2215,7 +2171,7 @@ "show": true, "title_case": false, "type": "str", - "value": "OPENAI_API_KEY" + "value": "" }, "openai_api_type": { "_input_type": "MessageTextInput", @@ -2398,7 +2354,7 @@ }, "dragging": false, "height": 320, - "id": "OpenAIEmbeddings-tSZ8A", + "id": "OpenAIEmbeddings-D1jSt", "measured": { "height": 320, "width": 320 @@ -2417,7 +2373,7 @@ }, { "data": { - "id": "File-EO8pn", + "id": "File-kPRpn", "node": { "base_classes": [ "Data" @@ -2664,7 +2620,7 @@ }, "dragging": false, "height": 367, - "id": "File-EO8pn", + "id": "File-kPRpn", "measured": { "height": 367, "width": 320 @@ -2683,7 +2639,7 @@ }, { "data": { - "id": "note-NsKYL", + "id": "note-mQAwf", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2696,7 +2652,7 @@ }, "dragging": false, "height": 324, - "id": "note-NsKYL", + "id": "note-mQAwf", "measured": { "height": 324, "width": 324 @@ -2715,7 +2671,7 @@ }, { "data": { - "id": "note-By1Lm", + "id": "note-6Bw7F", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2728,7 +2684,7 @@ }, "dragging": false, "height": 324, - "id": "note-By1Lm", + "id": "note-6Bw7F", "measured": { "height": 324, "width": 324 @@ -2747,7 +2703,7 @@ }, { "data": { - "id": "note-iSzAZ", + "id": "note-dVn2E", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2760,7 +2716,7 @@ }, "dragging": false, "height": 324, - "id": "note-iSzAZ", + "id": "note-dVn2E", "measured": { "height": 324, "width": 324 @@ -2779,7 +2735,7 @@ }, { "data": { - "id": "OpenAIModel-Ej17f", + "id": "OpenAIModel-9bWp2", "node": { "base_classes": [ "LanguageModel", @@ -3158,9 +3114,9 @@ "type": "OpenAIModel" }, "dragging": false, - "id": "OpenAIModel-Ej17f", + "id": "OpenAIModel-9bWp2", "measured": { - "height": 614, + "height": 540, "width": 320 }, "position": { @@ -3172,7 +3128,7 @@ }, { "data": { - "id": "parser-YIJGN", + "id": "parser-Qet8H", "node": { "base_classes": [ "Message" @@ -3333,9 +3289,9 @@ "type": "parser" }, "dragging": false, - "id": "parser-YIJGN", + "id": "parser-Qet8H", "measured": { - "height": 395, + "height": 361, "width": 320 }, "position": { @@ -3347,7 +3303,7 @@ }, { "data": { - "id": "AstraDB-BRnBB", + "id": "AstraDB-tVkFw", "node": { "base_classes": [ "Data", @@ -3481,7 +3437,7 @@ "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "" + "value": "ASTRA_DB_API_ENDPOINT" }, "astradb_vectorstore_kwargs": { "_input_type": "NestedDictInput", @@ -3722,7 +3678,11 @@ "dynamic": false, "info": "Cloud provider for the new database.", "name": "cloud_provider", - "options": [], + "options": [ + "Amazon Web Services", + "Google Cloud Platform", + "Microsoft Azure" + ], "options_metadata": [], "placeholder": "", "real_time_refresh": true, @@ -3765,12 +3725,22 @@ "info": "The Database name for the Astra DB instance.", "name": "database_name", "options": [], - "options_metadata": [], + "options_metadata": [ + { + "api_endpoint": "https://deb10a81-3c5d-4fd3-8b1b-945915d2835b-us-east-2.apps.astra.datastax.com", + "collections": 1, + "keyspaces": [ + "default_keyspace" + ], + "org_id": "4bd8a5f9-41b3-4d8a-b039-0dd35f5eb374", + "status": null + } + ], "placeholder": "", "real_time_refresh": true, "refresh_button": true, "required": true, - "show": false, + "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, @@ -4095,9 +4065,9 @@ "type": "AstraDB" }, "dragging": false, - "id": "AstraDB-BRnBB", + "id": "AstraDB-tVkFw", "measured": { - "height": 449, + "height": 458, "width": 320 }, "position": { @@ -4109,7 +4079,7 @@ }, { "data": { - "id": "AstraDB-lXzoG", + "id": "AstraDB-eQaxM", "node": { "base_classes": [ "Data", @@ -4531,12 +4501,22 @@ "info": "The Database name for the Astra DB instance.", "name": "database_name", "options": [], - "options_metadata": [], + "options_metadata": [ + { + "api_endpoint": "https://deb10a81-3c5d-4fd3-8b1b-945915d2835b-us-east-2.apps.astra.datastax.com", + "collections": 1, + "keyspaces": [ + "default_keyspace" + ], + "org_id": "4bd8a5f9-41b3-4d8a-b039-0dd35f5eb374", + "status": null + } + ], "placeholder": "", "real_time_refresh": true, "refresh_button": true, "required": true, - "show": false, + "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, @@ -4852,7 +4832,7 @@ "show": true, "title_case": false, "type": "str", - "value": "ASTRA_DB_APPLICATION_TOKEN" + "value": "" } }, "tool_mode": false @@ -4861,9 +4841,9 @@ "type": "AstraDB" }, "dragging": false, - "id": "AstraDB-lXzoG", + "id": "AstraDB-eQaxM", "measured": { - "height": 449, + "height": 458, "width": 320 }, "position": { @@ -4875,9 +4855,9 @@ } ], "viewport": { - "x": 90.57560089396452, - "y": -149.7037806007536, - "zoom": 0.46276403161264995 + "x": 20.50191698112849, + "y": -144.65436276592914, + "zoom": 0.43295751491830675 } }, "description": "Load your data for chat context with Retrieval Augmented Generation.", diff --git a/src/backend/tests/unit/components/processing/test_split_text_component.py b/src/backend/tests/unit/components/processing/test_split_text_component.py index 3d3a0de8946d..40e66ed62a37 100644 --- a/src/backend/tests/unit/components/processing/test_split_text_component.py +++ b/src/backend/tests/unit/components/processing/test_split_text_component.py @@ -38,7 +38,7 @@ def file_names_mapping(self): def test_split_text_basic(self): """Test basic text splitting functionality.""" component = SplitTextComponent() - test_text = "This is a test.\nIt has multiple lines.\nEach line should be a chunk." + test_text = "First chunk\nSecond chunk\nThird chunk" component.set_attributes( { "data_inputs": [Data(text=test_text)], @@ -52,12 +52,18 @@ def test_split_text_basic(self): } ) - results = component.split_text() - assert len(results) == 3, f"Expected 3 chunks, got {len(results)}" - assert "This is a test" in results[0].text, f"Expected 'This is a test', got '{results[0].text}'" - assert "It has multiple lines" in results[1].text, f"Expected 'It has multiple lines', got '{results[1].text}'" - assert "Each line should be a chunk" in results[2].text, ( - f"Expected 'Each line should be a chunk', got '{results[2].text}'" + data_frame = component.split_text() + assert isinstance(data_frame, DataFrame), "Expected DataFrame instance" + assert len(data_frame) == 3, f"Expected DataFrame with 3 rows, got {len(data_frame)}" + assert list(data_frame.columns) == ["text"], f"Expected columns ['text'], got {list(data_frame.columns)}" + assert "First chunk" in data_frame.iloc[0]["text"], ( + f"Expected 'First chunk', got '{data_frame.iloc[0]['text']}'" + ) + assert "Second chunk" in data_frame.iloc[1]["text"], ( + f"Expected 'Second chunk', got '{data_frame.iloc[1]['text']}'" + ) + assert "Third chunk" in data_frame.iloc[2]["text"], ( + f"Expected 'Third chunk', got '{data_frame.iloc[2]['text']}'" ) def test_split_text_with_overlap(self): @@ -76,17 +82,24 @@ def test_split_text_with_overlap(self): } ) - results = component.split_text() - assert len(results) > 1, f"Expected more than 1 chunk, got {len(results)}" - # Check that chunks contain the expected text - assert "First chunk" in results[0].text, f"Expected 'First chunk' in '{results[0].text}'" - assert "Second chunk" in results[1].text, f"Expected 'Second chunk' in '{results[1].text}'" - assert "Third chunk" in results[2].text, f"Expected 'Third chunk' in '{results[2].text}'" + data_frame = component.split_text() + assert isinstance(data_frame, DataFrame), "Expected DataFrame instance" + assert len(data_frame) == 3, f"Expected DataFrame with 3 rows, got {len(data_frame)}" + assert list(data_frame.columns) == ["text"], f"Expected columns ['text'], got {list(data_frame.columns)}" + assert "First chunk" in data_frame.iloc[0]["text"], ( + f"Expected 'First chunk', got '{data_frame.iloc[0]['text']}'" + ) + assert "Second chunk" in data_frame.iloc[1]["text"], ( + f"Expected 'Second chunk', got '{data_frame.iloc[1]['text']}'" + ) + assert "Third chunk" in data_frame.iloc[2]["text"], ( + f"Expected 'Third chunk', got '{data_frame.iloc[2]['text']}'" + ) def test_split_text_custom_separator(self): """Test text splitting with a custom separator.""" component = SplitTextComponent() - test_text = "First part|Second part|Third part" + test_text = "First chunk.|Second chunk.|Third chunk." component.set_attributes( { "data_inputs": [Data(text=test_text)], @@ -99,17 +112,25 @@ def test_split_text_custom_separator(self): } ) - results = component.split_text() - assert len(results) == 3, f"Expected 3 chunks, got {len(results)}" - assert "First part" in results[0].text, f"Expected 'First part', got '{results[0].text}'" - assert "Second part" in results[1].text, f"Expected 'Second part', got '{results[1].text}'" - assert "Third part" in results[2].text, f"Expected 'Third part', got '{results[2].text}'" + data_frame = component.split_text() + assert isinstance(data_frame, DataFrame), "Expected DataFrame instance" + assert len(data_frame) == 3, f"Expected DataFrame with 3 rows, got {len(data_frame)}" + assert list(data_frame.columns) == ["text"], f"Expected columns ['text'], got {list(data_frame.columns)}" + assert "First chunk" in data_frame.iloc[0]["text"], ( + f"Expected 'First chunk', got '{data_frame.iloc[0]['text']}'" + ) + assert "Second chunk" in data_frame.iloc[1]["text"], ( + f"Expected 'Second chunk', got '{data_frame.iloc[1]['text']}'" + ) + assert "Third chunk" in data_frame.iloc[2]["text"], ( + f"Expected 'Third chunk', got '{data_frame.iloc[2]['text']}'" + ) def test_split_text_with_metadata(self): """Test text splitting while preserving metadata.""" component = SplitTextComponent() test_metadata = {"source": "test.txt", "author": "test"} - test_text = "Chunk 1\nChunk 2" + test_text = "First chunk\nSecond chunk" component.set_attributes( { "data_inputs": [Data(text=test_text, data=test_metadata)], @@ -122,45 +143,23 @@ def test_split_text_with_metadata(self): } ) - results = component.split_text() - assert len(results) == 2, f"Expected 2 chunks, got {len(results)}" - for result in results: - assert result.data["source"] == test_metadata["source"], ( - f"Expected source '{test_metadata['source']}', got '{result.data.get('source')}'" - ) - assert result.data["author"] == test_metadata["author"], ( - f"Expected author '{test_metadata['author']}', got '{result.data.get('author')}'" - ) - - def test_split_text_as_dataframe(self): - """Test converting split text results to DataFrame.""" - component = SplitTextComponent() - test_text = "First chunk\nSecond chunk\nThird chunk" - component.set_attributes( - { - "data_inputs": [Data(text=test_text)], - "chunk_overlap": 0, - "chunk_size": 11, - "separator": "\n", - "session_id": "test_session", - "sender": "test_sender", - "sender_name": "test_sender_name", - } - ) - - data_frame = component.as_dataframe() + data_frame = component.split_text() assert isinstance(data_frame, DataFrame), "Expected DataFrame instance" - assert len(data_frame) == 3, f"Expected DataFrame with 3 rows, got {len(data_frame)}" - assert list(data_frame.columns) == ["text"], f"Expected columns ['text'], got {list(data_frame.columns)}" + assert len(data_frame) == 2, f"Expected DataFrame with 2 rows, got {len(data_frame)}" assert "First chunk" in data_frame.iloc[0]["text"], ( f"Expected 'First chunk', got '{data_frame.iloc[0]['text']}'" ) assert "Second chunk" in data_frame.iloc[1]["text"], ( f"Expected 'Second chunk', got '{data_frame.iloc[1]['text']}'" ) - assert "Third chunk" in data_frame.iloc[2]["text"], ( - f"Expected 'Third chunk', got '{data_frame.iloc[2]['text']}'" - ) + # Loop over each row to check metadata + for _, row in data_frame.iterrows(): + assert row["source"] == test_metadata["source"], ( + f"Expected source '{test_metadata['source']}', got '{row['source']}'" + ) + assert row["author"] == test_metadata["author"], ( + f"Expected author '{test_metadata['author']}', got '{row['author']}'" + ) def test_split_text_empty_input(self): """Test handling of empty input text.""" @@ -198,7 +197,7 @@ def test_split_text_single_chunk(self): results = component.split_text() assert len(results) == 1, f"Expected 1 chunk, got {len(results)}" - assert results[0].text == test_text, f"Expected '{test_text}', got '{results[0].text}'" + assert results["text"][0] == test_text, f"Expected '{test_text}', got '{results['text'][0]}'" def test_split_text_multiple_inputs(self): """Test splitting multiple input texts.""" @@ -218,10 +217,10 @@ def test_split_text_multiple_inputs(self): results = component.split_text() assert len(results) == 4, f"Expected 4 chunks (2 from each text), got {len(results)}" - assert "First text" in results[0].text, f"Expected 'First text', got '{results[0].text}'" - assert "Second line" in results[1].text, f"Expected 'Second line', got '{results[1].text}'" - assert "Another text" in results[2].text, f"Expected 'Another text', got '{results[2].text}'" - assert "Another line" in results[3].text, f"Expected 'Another line', got '{results[3].text}'" + assert "First text" in results["text"][0], f"Expected 'First text', got '{results['text'][0]}'" + assert "Second line" in results["text"][1], f"Expected 'Second line', got '{results['text'][1]}'" + assert "Another text" in results["text"][2], f"Expected 'Another text', got '{results['text'][2]}'" + assert "Another line" in results["text"][3], f"Expected 'Another line', got '{results['text'][3]}'" def test_split_text_with_dataframe_input(self): """Test splitting text with DataFrame input.""" @@ -242,10 +241,10 @@ def test_split_text_with_dataframe_input(self): results = component.split_text() assert len(results) == 4, f"Expected 4 chunks (2 from each text), got {len(results)}" - assert "First text" in results[0].text, f"Expected 'First text', got '{results[0].text}'" - assert "Second line" in results[1].text, f"Expected 'Second line', got '{results[1].text}'" - assert "Another text" in results[2].text, f"Expected 'Another text', got '{results[2].text}'" - assert "Another line" in results[3].text, f"Expected 'Another line', got '{results[3].text}'" + assert "First text" in results["text"][0], f"Expected 'First text', got '{results['text'][0]}'" + assert "Second line" in results["text"][1], f"Expected 'Second line', got '{results['text'][1]}'" + assert "Another text" in results["text"][2], f"Expected 'Another text', got '{results['text'][2]}'" + assert "Another line" in results["text"][3], f"Expected 'Another line', got '{results['text'][3]}'" def test_with_url_loader(self): """Test splitting text with URL loader.""" @@ -267,5 +266,5 @@ def test_with_url_loader(self): ) results = component.split_text() - assert isinstance(results, list), "Expected list instance" + assert isinstance(results, DataFrame), "Expected DataFrame instance" assert len(results) > 2, f"Expected DataFrame with more than 2 rows, got {len(results)}" diff --git a/src/frontend/tests/core/features/filterSidebar.spec.ts b/src/frontend/tests/core/features/filterSidebar.spec.ts index 9a9520b8e482..2b59c76323ac 100644 --- a/src/frontend/tests/core/features/filterSidebar.spec.ts +++ b/src/frontend/tests/core/features/filterSidebar.spec.ts @@ -130,7 +130,7 @@ test( await expect(page.getByTestId("logicSub Flow [Deprecated]")).toBeVisible(); - await expect(page.getByTestId("processingSplit Text")).toBeVisible(); + await expect(page.getByTestId("processingData Operations")).toBeVisible(); await page.getByTestId("icon-X").first().click(); diff --git a/src/frontend/tests/core/features/freeze.spec.ts b/src/frontend/tests/core/features/freeze.spec.ts index e35d40ef8d02..a7409db33afa 100644 --- a/src/frontend/tests/core/features/freeze.spec.ts +++ b/src/frontend/tests/core/features/freeze.spec.ts @@ -58,13 +58,13 @@ test( //fourth component await page.getByTestId("sidebar-search-input").click(); - await page.getByTestId("sidebar-search-input").fill("data to message"); - await page.waitForSelector('[data-testid="processingData to Message"]', { + await page.getByTestId("sidebar-search-input").fill("Parser"); + await page.waitForSelector('[data-testid="processingParser"]', { timeout: 1000, }); await page - .getByTestId("processingData to Message") + .getByTestId("processingParser") .dragTo(page.locator('//*[@id="react-flow-id"]'), { targetPosition: { x: 50, y: 300 }, }); @@ -133,11 +133,13 @@ test( .getByTestId("handle-splittext-shownode-chunks-right") .nth(0) .click(); - await page.getByTestId("handle-parsedata-shownode-data-left").click(); + await page + .getByTestId("handle-parsercomponent-shownode-data or dataframe-left") + .click(); //connection 4 await page - .getByTestId("handle-parsedata-shownode-message-right") + .getByTestId("handle-parsercomponent-shownode-parsed text-right") .nth(0) .click(); await page.getByTestId("handle-chatoutput-shownode-inputs-left").click();