From dbc6a9c74c538b2dd75463d126d3456e5555b351 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Tue, 3 Jun 2025 20:08:09 -0700 Subject: [PATCH 1/8] fix: Split text should only have dataframe output --- .../components/processing/split_text.py | 10 +- .../processing/test_split_text_component.py | 123 +++++++++--------- 2 files changed, 64 insertions(+), 69 deletions(-) diff --git a/src/backend/base/langflow/components/processing/split_text.py b/src/backend/base/langflow/components/processing/split_text.py index 50e805a90b53..1359236698ea 100644 --- a/src/backend/base/langflow/components/processing/split_text.py +++ b/src/backend/base/langflow/components/processing/split_text.py @@ -63,8 +63,7 @@ class SplitTextComponent(Component): ] outputs = [ - Output(display_name="Chunks", name="chunks", method="split_text"), - Output(display_name="DataFrame", name="dataframe", method="as_dataframe"), + Output(display_name="Chunks", name="dataframe", method="split_text"), ] def _docs_to_data(self, docs) -> list[Data]: @@ -132,8 +131,5 @@ def split_text_base(self): msg = f"Error splitting text: {e}" raise TypeError(msg) from e - def split_text(self) -> list[Data]: - return self._docs_to_data(self.split_text_base()) - - def as_dataframe(self) -> DataFrame: - return DataFrame(self.split_text()) + def split_text(self) -> DataFrame: + return DataFrame(self._docs_to_data(self.split_text_base())) diff --git a/src/backend/tests/unit/components/processing/test_split_text_component.py b/src/backend/tests/unit/components/processing/test_split_text_component.py index 3d3a0de8946d..8d97c3d1bf61 100644 --- a/src/backend/tests/unit/components/processing/test_split_text_component.py +++ b/src/backend/tests/unit/components/processing/test_split_text_component.py @@ -38,7 +38,7 @@ def file_names_mapping(self): def test_split_text_basic(self): """Test basic text splitting functionality.""" component = SplitTextComponent() - test_text = "This is a test.\nIt has multiple lines.\nEach line should be a chunk." + test_text = "First chunk\nSecond chunk\nThird chunk" component.set_attributes( { "data_inputs": [Data(text=test_text)], @@ -52,12 +52,18 @@ def test_split_text_basic(self): } ) - results = component.split_text() - assert len(results) == 3, f"Expected 3 chunks, got {len(results)}" - assert "This is a test" in results[0].text, f"Expected 'This is a test', got '{results[0].text}'" - assert "It has multiple lines" in results[1].text, f"Expected 'It has multiple lines', got '{results[1].text}'" - assert "Each line should be a chunk" in results[2].text, ( - f"Expected 'Each line should be a chunk', got '{results[2].text}'" + data_frame = component.split_text() + assert isinstance(data_frame, DataFrame), "Expected DataFrame instance" + assert len(data_frame) == 3, f"Expected DataFrame with 3 rows, got {len(data_frame)}" + assert list(data_frame.columns) == ["text"], f"Expected columns ['text'], got {list(data_frame.columns)}" + assert "First chunk" in data_frame.iloc[0]["text"], ( + f"Expected 'First chunk', got '{data_frame.iloc[0]['text']}'" + ) + assert "Second chunk" in data_frame.iloc[1]["text"], ( + f"Expected 'Second chunk', got '{data_frame.iloc[1]['text']}'" + ) + assert "Third chunk" in data_frame.iloc[2]["text"], ( + f"Expected 'Third chunk', got '{data_frame.iloc[2]['text']}'" ) def test_split_text_with_overlap(self): @@ -76,17 +82,24 @@ def test_split_text_with_overlap(self): } ) - results = component.split_text() - assert len(results) > 1, f"Expected more than 1 chunk, got {len(results)}" - # Check that chunks contain the expected text - assert "First chunk" in results[0].text, f"Expected 'First chunk' in '{results[0].text}'" - assert "Second chunk" in results[1].text, f"Expected 'Second chunk' in '{results[1].text}'" - assert "Third chunk" in results[2].text, f"Expected 'Third chunk' in '{results[2].text}'" + data_frame = component.split_text() + assert isinstance(data_frame, DataFrame), "Expected DataFrame instance" + assert len(data_frame) == 3, f"Expected DataFrame with 3 rows, got {len(data_frame)}" + assert list(data_frame.columns) == ["text"], f"Expected columns ['text'], got {list(data_frame.columns)}" + assert "First chunk" in data_frame.iloc[0]["text"], ( + f"Expected 'First chunk', got '{data_frame.iloc[0]['text']}'" + ) + assert "Second chunk" in data_frame.iloc[1]["text"], ( + f"Expected 'Second chunk', got '{data_frame.iloc[1]['text']}'" + ) + assert "Third chunk" in data_frame.iloc[2]["text"], ( + f"Expected 'Third chunk', got '{data_frame.iloc[2]['text']}'" + ) def test_split_text_custom_separator(self): """Test text splitting with a custom separator.""" component = SplitTextComponent() - test_text = "First part|Second part|Third part" + test_text = "First chunk.|Second chunk.|Third chunk." component.set_attributes( { "data_inputs": [Data(text=test_text)], @@ -99,17 +112,25 @@ def test_split_text_custom_separator(self): } ) - results = component.split_text() - assert len(results) == 3, f"Expected 3 chunks, got {len(results)}" - assert "First part" in results[0].text, f"Expected 'First part', got '{results[0].text}'" - assert "Second part" in results[1].text, f"Expected 'Second part', got '{results[1].text}'" - assert "Third part" in results[2].text, f"Expected 'Third part', got '{results[2].text}'" + data_frame = component.split_text() + assert isinstance(data_frame, DataFrame), "Expected DataFrame instance" + assert len(data_frame) == 3, f"Expected DataFrame with 3 rows, got {len(data_frame)}" + assert list(data_frame.columns) == ["text"], f"Expected columns ['text'], got {list(data_frame.columns)}" + assert "First chunk" in data_frame.iloc[0]["text"], ( + f"Expected 'First chunk', got '{data_frame.iloc[0]['text']}'" + ) + assert "Second chunk" in data_frame.iloc[1]["text"], ( + f"Expected 'Second chunk', got '{data_frame.iloc[1]['text']}'" + ) + assert "Third chunk" in data_frame.iloc[2]["text"], ( + f"Expected 'Third chunk', got '{data_frame.iloc[2]['text']}'" + ) def test_split_text_with_metadata(self): """Test text splitting while preserving metadata.""" component = SplitTextComponent() test_metadata = {"source": "test.txt", "author": "test"} - test_text = "Chunk 1\nChunk 2" + test_text = "First chunk\nSecond chunk" component.set_attributes( { "data_inputs": [Data(text=test_text, data=test_metadata)], @@ -122,45 +143,23 @@ def test_split_text_with_metadata(self): } ) - results = component.split_text() - assert len(results) == 2, f"Expected 2 chunks, got {len(results)}" - for result in results: - assert result.data["source"] == test_metadata["source"], ( - f"Expected source '{test_metadata['source']}', got '{result.data.get('source')}'" - ) - assert result.data["author"] == test_metadata["author"], ( - f"Expected author '{test_metadata['author']}', got '{result.data.get('author')}'" - ) - - def test_split_text_as_dataframe(self): - """Test converting split text results to DataFrame.""" - component = SplitTextComponent() - test_text = "First chunk\nSecond chunk\nThird chunk" - component.set_attributes( - { - "data_inputs": [Data(text=test_text)], - "chunk_overlap": 0, - "chunk_size": 11, - "separator": "\n", - "session_id": "test_session", - "sender": "test_sender", - "sender_name": "test_sender_name", - } - ) - - data_frame = component.as_dataframe() + data_frame = component.split_text() assert isinstance(data_frame, DataFrame), "Expected DataFrame instance" - assert len(data_frame) == 3, f"Expected DataFrame with 3 rows, got {len(data_frame)}" - assert list(data_frame.columns) == ["text"], f"Expected columns ['text'], got {list(data_frame.columns)}" + assert len(data_frame) == 2, f"Expected DataFrame with 2 rows, got {len(data_frame)}" assert "First chunk" in data_frame.iloc[0]["text"], ( f"Expected 'First chunk', got '{data_frame.iloc[0]['text']}'" ) assert "Second chunk" in data_frame.iloc[1]["text"], ( f"Expected 'Second chunk', got '{data_frame.iloc[1]['text']}'" ) - assert "Third chunk" in data_frame.iloc[2]["text"], ( - f"Expected 'Third chunk', got '{data_frame.iloc[2]['text']}'" - ) + # Loop over each row to check metadata + for _, row in data_frame.iterrows(): + assert row["source"] == test_metadata["source"], ( + f"Expected source '{test_metadata['source']}', got '{row["source"]}'" + ) + assert row["author"] == test_metadata["author"], ( + f"Expected author '{test_metadata['author']}', got '{row["author"]}'" + ) def test_split_text_empty_input(self): """Test handling of empty input text.""" @@ -198,7 +197,7 @@ def test_split_text_single_chunk(self): results = component.split_text() assert len(results) == 1, f"Expected 1 chunk, got {len(results)}" - assert results[0].text == test_text, f"Expected '{test_text}', got '{results[0].text}'" + assert results["text"][0] == test_text, f"Expected '{test_text}', got '{results["text"][0]}'" def test_split_text_multiple_inputs(self): """Test splitting multiple input texts.""" @@ -218,10 +217,10 @@ def test_split_text_multiple_inputs(self): results = component.split_text() assert len(results) == 4, f"Expected 4 chunks (2 from each text), got {len(results)}" - assert "First text" in results[0].text, f"Expected 'First text', got '{results[0].text}'" - assert "Second line" in results[1].text, f"Expected 'Second line', got '{results[1].text}'" - assert "Another text" in results[2].text, f"Expected 'Another text', got '{results[2].text}'" - assert "Another line" in results[3].text, f"Expected 'Another line', got '{results[3].text}'" + assert "First text" in results["text"][0], f"Expected 'First text', got '{results["text"][0]}'" + assert "Second line" in results["text"][1], f"Expected 'Second line', got '{results["text"][1]}'" + assert "Another text" in results["text"][2], f"Expected 'Another text', got '{results["text"][2]}'" + assert "Another line" in results["text"][3], f"Expected 'Another line', got '{results["text"][3]}'" def test_split_text_with_dataframe_input(self): """Test splitting text with DataFrame input.""" @@ -242,10 +241,10 @@ def test_split_text_with_dataframe_input(self): results = component.split_text() assert len(results) == 4, f"Expected 4 chunks (2 from each text), got {len(results)}" - assert "First text" in results[0].text, f"Expected 'First text', got '{results[0].text}'" - assert "Second line" in results[1].text, f"Expected 'Second line', got '{results[1].text}'" - assert "Another text" in results[2].text, f"Expected 'Another text', got '{results[2].text}'" - assert "Another line" in results[3].text, f"Expected 'Another line', got '{results[3].text}'" + assert "First text" in results["text"][0], f"Expected 'First text', got '{results["text"][0]}'" + assert "Second line" in results["text"][1], f"Expected 'Second line', got '{results["text"][1]}'" + assert "Another text" in results["text"][2], f"Expected 'Another text', got '{results["text"][2]}'" + assert "Another line" in results["text"][3], f"Expected 'Another line', got '{results["text"][3]}'" def test_with_url_loader(self): """Test splitting text with URL loader.""" @@ -267,5 +266,5 @@ def test_with_url_loader(self): ) results = component.split_text() - assert isinstance(results, list), "Expected list instance" + assert isinstance(results, DataFrame), "Expected DataFrame instance" assert len(results) > 2, f"Expected DataFrame with more than 2 rows, got {len(results)}" From cf45d523de6438c4f3d89519703bf6ae5b63a6df Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Wed, 4 Jun 2025 03:13:56 +0000 Subject: [PATCH 2/8] [autofix.ci] apply automated fixes --- .../processing/test_split_text_component.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/backend/tests/unit/components/processing/test_split_text_component.py b/src/backend/tests/unit/components/processing/test_split_text_component.py index 8d97c3d1bf61..40e66ed62a37 100644 --- a/src/backend/tests/unit/components/processing/test_split_text_component.py +++ b/src/backend/tests/unit/components/processing/test_split_text_component.py @@ -155,10 +155,10 @@ def test_split_text_with_metadata(self): # Loop over each row to check metadata for _, row in data_frame.iterrows(): assert row["source"] == test_metadata["source"], ( - f"Expected source '{test_metadata['source']}', got '{row["source"]}'" + f"Expected source '{test_metadata['source']}', got '{row['source']}'" ) assert row["author"] == test_metadata["author"], ( - f"Expected author '{test_metadata['author']}', got '{row["author"]}'" + f"Expected author '{test_metadata['author']}', got '{row['author']}'" ) def test_split_text_empty_input(self): @@ -197,7 +197,7 @@ def test_split_text_single_chunk(self): results = component.split_text() assert len(results) == 1, f"Expected 1 chunk, got {len(results)}" - assert results["text"][0] == test_text, f"Expected '{test_text}', got '{results["text"][0]}'" + assert results["text"][0] == test_text, f"Expected '{test_text}', got '{results['text'][0]}'" def test_split_text_multiple_inputs(self): """Test splitting multiple input texts.""" @@ -217,10 +217,10 @@ def test_split_text_multiple_inputs(self): results = component.split_text() assert len(results) == 4, f"Expected 4 chunks (2 from each text), got {len(results)}" - assert "First text" in results["text"][0], f"Expected 'First text', got '{results["text"][0]}'" - assert "Second line" in results["text"][1], f"Expected 'Second line', got '{results["text"][1]}'" - assert "Another text" in results["text"][2], f"Expected 'Another text', got '{results["text"][2]}'" - assert "Another line" in results["text"][3], f"Expected 'Another line', got '{results["text"][3]}'" + assert "First text" in results["text"][0], f"Expected 'First text', got '{results['text'][0]}'" + assert "Second line" in results["text"][1], f"Expected 'Second line', got '{results['text'][1]}'" + assert "Another text" in results["text"][2], f"Expected 'Another text', got '{results['text'][2]}'" + assert "Another line" in results["text"][3], f"Expected 'Another line', got '{results['text'][3]}'" def test_split_text_with_dataframe_input(self): """Test splitting text with DataFrame input.""" @@ -241,10 +241,10 @@ def test_split_text_with_dataframe_input(self): results = component.split_text() assert len(results) == 4, f"Expected 4 chunks (2 from each text), got {len(results)}" - assert "First text" in results["text"][0], f"Expected 'First text', got '{results["text"][0]}'" - assert "Second line" in results["text"][1], f"Expected 'Second line', got '{results["text"][1]}'" - assert "Another text" in results["text"][2], f"Expected 'Another text', got '{results["text"][2]}'" - assert "Another line" in results["text"][3], f"Expected 'Another line', got '{results["text"][3]}'" + assert "First text" in results["text"][0], f"Expected 'First text', got '{results['text'][0]}'" + assert "Second line" in results["text"][1], f"Expected 'Second line', got '{results['text'][1]}'" + assert "Another text" in results["text"][2], f"Expected 'Another text', got '{results['text'][2]}'" + assert "Another line" in results["text"][3], f"Expected 'Another line', got '{results['text'][3]}'" def test_with_url_loader(self): """Test splitting text with URL loader.""" From 275b07cef66bc90029165bda22c31918693120ca Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Tue, 3 Jun 2025 20:49:34 -0700 Subject: [PATCH 3/8] Update templates --- .../Pok\303\251dex Agent.json" | 2 +- .../starter_projects/Vector Store RAG.json | 22 +++---------------- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git "a/src/backend/base/langflow/initial_setup/starter_projects/Pok\303\251dex Agent.json" "b/src/backend/base/langflow/initial_setup/starter_projects/Pok\303\251dex Agent.json" index 5f06ff8e7912..69265210df53 100644 --- "a/src/backend/base/langflow/initial_setup/starter_projects/Pok\303\251dex Agent.json" +++ "b/src/backend/base/langflow/initial_setup/starter_projects/Pok\303\251dex Agent.json" @@ -922,7 +922,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import json\nimport re\nimport tempfile\nfrom datetime import datetime, timezone\nfrom pathlib import Path\nfrom typing import Any\nfrom urllib.parse import parse_qsl, urlencode, urlparse, urlunparse\n\nimport aiofiles\nimport aiofiles.os as aiofiles_os\nimport httpx\nimport validators\n\nfrom langflow.base.curl.parse import parse_context\nfrom langflow.custom import Component\nfrom langflow.inputs.inputs import TabInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n IntInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.services.deps import get_settings_service\nfrom langflow.utils.component_utils import set_current_fields, set_field_advanced, set_field_display\n\n# Define fields for each mode\nMODE_FIELDS = {\n \"URL\": [\n \"url_input\",\n \"method\",\n ],\n \"cURL\": [\"curl_input\"],\n}\n\n# Fields that should always be visible\nDEFAULT_FIELDS = [\"mode\"]\n\n\nclass APIRequestComponent(Component):\n display_name = \"API Request\"\n description = \"Make HTTP requests using URL or cURL commands.\"\n icon = \"Globe\"\n name = \"APIRequest\"\n\n inputs = [\n MessageTextInput(\n name=\"url_input\",\n display_name=\"URL\",\n info=\"Enter the URL for the request.\",\n advanced=False,\n tool_mode=True,\n ),\n MultilineInput(\n name=\"curl_input\",\n display_name=\"cURL\",\n info=(\n \"Paste a curl command to populate the fields. \"\n \"This will fill in the dictionary fields for headers and body.\"\n ),\n real_time_refresh=True,\n tool_mode=True,\n advanced=True,\n show=False,\n ),\n DropdownInput(\n name=\"method\",\n display_name=\"Method\",\n options=[\"GET\", \"POST\", \"PATCH\", \"PUT\", \"DELETE\"],\n value=\"GET\",\n info=\"The HTTP method to use.\",\n real_time_refresh=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"URL\", \"cURL\"],\n value=\"URL\",\n info=\"Enable cURL mode to populate fields from a cURL command.\",\n real_time_refresh=True,\n ),\n DataInput(\n name=\"query_params\",\n display_name=\"Query Parameters\",\n info=\"The query parameters to append to the URL.\",\n advanced=True,\n ),\n TableInput(\n name=\"body\",\n display_name=\"Body\",\n info=\"The body to send with the request as a dictionary (for POST, PATCH, PUT).\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Key\",\n \"type\": \"str\",\n \"description\": \"Parameter name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"description\": \"Parameter value\",\n },\n ],\n value=[],\n input_types=[\"Data\"],\n advanced=True,\n real_time_refresh=True,\n ),\n TableInput(\n name=\"headers\",\n display_name=\"Headers\",\n info=\"The headers to send with the request\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Header\",\n \"type\": \"str\",\n \"description\": \"Header name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Header value\",\n },\n ],\n value=[{\"key\": \"User-Agent\", \"value\": get_settings_service().settings.user_agent}],\n advanced=True,\n input_types=[\"Data\"],\n real_time_refresh=True,\n ),\n IntInput(\n name=\"timeout\",\n display_name=\"Timeout\",\n value=30,\n info=\"The timeout to use for the request.\",\n advanced=True,\n ),\n BoolInput(\n name=\"follow_redirects\",\n display_name=\"Follow Redirects\",\n value=True,\n info=\"Whether to follow http redirects.\",\n advanced=True,\n ),\n BoolInput(\n name=\"save_to_file\",\n display_name=\"Save to File\",\n value=False,\n info=\"Save the API response to a temporary file\",\n advanced=True,\n ),\n BoolInput(\n name=\"include_httpx_metadata\",\n display_name=\"Include HTTPx Metadata\",\n value=False,\n info=(\n \"Include properties such as headers, status_code, response_headers, \"\n \"and redirection_history in the output.\"\n ),\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"API Response\", name=\"data\", method=\"make_api_request\"),\n ]\n\n def _parse_json_value(self, value: Any) -> Any:\n \"\"\"Parse a value that might be a JSON string.\"\"\"\n if not isinstance(value, str):\n return value\n\n try:\n parsed = json.loads(value)\n except json.JSONDecodeError:\n return value\n else:\n return parsed\n\n def _process_body(self, body: Any) -> dict:\n \"\"\"Process the body input into a valid dictionary.\"\"\"\n if body is None:\n return {}\n if isinstance(body, dict):\n return self._process_dict_body(body)\n if isinstance(body, str):\n return self._process_string_body(body)\n if isinstance(body, list):\n return self._process_list_body(body)\n return {}\n\n def _process_dict_body(self, body: dict) -> dict:\n \"\"\"Process dictionary body by parsing JSON values.\"\"\"\n return {k: self._parse_json_value(v) for k, v in body.items()}\n\n def _process_string_body(self, body: str) -> dict:\n \"\"\"Process string body by attempting JSON parse.\"\"\"\n try:\n return self._process_body(json.loads(body))\n except json.JSONDecodeError:\n return {\"data\": body}\n\n def _process_list_body(self, body: list) -> dict:\n \"\"\"Process list body by converting to key-value dictionary.\"\"\"\n processed_dict = {}\n try:\n for item in body:\n if not self._is_valid_key_value_item(item):\n continue\n key = item[\"key\"]\n value = self._parse_json_value(item[\"value\"])\n processed_dict[key] = value\n except (KeyError, TypeError, ValueError) as e:\n self.log(f\"Failed to process body list: {e}\")\n return {}\n return processed_dict\n\n def _is_valid_key_value_item(self, item: Any) -> bool:\n \"\"\"Check if an item is a valid key-value dictionary.\"\"\"\n return isinstance(item, dict) and \"key\" in item and \"value\" in item\n\n\n def parse_curl(self, curl: str, build_config: dotdict) -> dotdict:\n \"\"\"Parse a cURL command and update build configuration.\"\"\"\n try:\n parsed = parse_context(curl)\n\n # Update basic configuration\n url = parsed.url\n # Normalize URL before setting it\n url = self._normalize_url(url)\n\n build_config[\"url_input\"][\"value\"] = url\n build_config[\"method\"][\"value\"] = parsed.method.upper()\n\n # Process headers\n headers_list = [{\"key\": k, \"value\": v} for k, v in parsed.headers.items()]\n build_config[\"headers\"][\"value\"] = headers_list\n\n # Process body data\n if not parsed.data:\n build_config[\"body\"][\"value\"] = []\n elif parsed.data:\n try:\n json_data = json.loads(parsed.data)\n if isinstance(json_data, dict):\n body_list = [\n {\"key\": k, \"value\": json.dumps(v) if isinstance(v, dict | list) else str(v)}\n for k, v in json_data.items()\n ]\n build_config[\"body\"][\"value\"] = body_list\n else:\n build_config[\"body\"][\"value\"] = [{\"key\": \"data\", \"value\": json.dumps(json_data)}]\n except json.JSONDecodeError:\n build_config[\"body\"][\"value\"] = [{\"key\": \"data\", \"value\": parsed.data}]\n\n except Exception as exc:\n msg = f\"Error parsing curl: {exc}\"\n self.log(msg)\n raise ValueError(msg) from exc\n\n return build_config\n\n def _normalize_url(self, url: str) -> str:\n \"\"\"Normalize URL by adding https:// if no protocol is specified.\"\"\"\n if not url or not isinstance(url, str):\n msg = \"URL cannot be empty\"\n raise ValueError(msg)\n\n url = url.strip()\n if url.startswith((\"http://\", \"https://\")):\n return url\n return f\"https://{url}\"\n\n async def make_request(\n self,\n client: httpx.AsyncClient,\n method: str,\n url: str,\n headers: dict | None = None,\n body: Any = None,\n timeout: int = 5,\n *,\n follow_redirects: bool = True,\n save_to_file: bool = False,\n include_httpx_metadata: bool = False,\n ) -> Data:\n method = method.upper()\n if method not in {\"GET\", \"POST\", \"PATCH\", \"PUT\", \"DELETE\"}:\n msg = f\"Unsupported method: {method}\"\n raise ValueError(msg)\n\n processed_body = self._process_body(body)\n redirection_history = []\n\n try:\n # Prepare request parameters\n request_params = {\n \"method\": method,\n \"url\": url,\n \"headers\": headers,\n \"json\": processed_body,\n \"timeout\": timeout,\n \"follow_redirects\": follow_redirects,\n }\n response = await client.request(**request_params)\n\n redirection_history = [\n {\n \"url\": redirect.headers.get(\"Location\", str(redirect.url)),\n \"status_code\": redirect.status_code,\n }\n for redirect in response.history\n ]\n\n is_binary, file_path = await self._response_info(response, with_file_path=save_to_file)\n response_headers = self._headers_to_dict(response.headers)\n\n # Base metadata\n metadata = {\n \"source\": url,\n \"status_code\": response.status_code,\n \"response_headers\": response_headers,\n }\n\n if redirection_history:\n metadata[\"redirection_history\"] = redirection_history\n\n if save_to_file:\n mode = \"wb\" if is_binary else \"w\"\n encoding = response.encoding if mode == \"w\" else None\n if file_path:\n await aiofiles_os.makedirs(file_path.parent, exist_ok=True)\n if is_binary:\n async with aiofiles.open(file_path, \"wb\") as f:\n await f.write(response.content)\n await f.flush()\n else:\n async with aiofiles.open(file_path, \"w\", encoding=encoding) as f:\n await f.write(response.text)\n await f.flush()\n metadata[\"file_path\"] = str(file_path)\n\n if include_httpx_metadata:\n metadata.update({\"headers\": headers})\n return Data(data=metadata)\n\n # Handle response content\n if is_binary:\n result = response.content\n else:\n try:\n result = response.json()\n except json.JSONDecodeError:\n self.log(\"Failed to decode JSON response\")\n result = response.text.encode(\"utf-8\")\n\n metadata[\"result\"] = result\n\n if include_httpx_metadata:\n metadata.update({\"headers\": headers})\n\n return Data(data=metadata)\n except (httpx.HTTPError, httpx.RequestError, httpx.TimeoutException) as exc:\n self.log(f\"Error making request to {url}\")\n return Data(\n data={\n \"source\": url,\n \"headers\": headers,\n \"status_code\": 500,\n \"error\": str(exc),\n **({\"redirection_history\": redirection_history} if redirection_history else {}),\n },\n )\n\n def add_query_params(self, url: str, params: dict) -> str:\n \"\"\"Add query parameters to URL efficiently.\"\"\"\n if not params:\n return url\n url_parts = list(urlparse(url))\n query = dict(parse_qsl(url_parts[4]))\n query.update(params)\n url_parts[4] = urlencode(query)\n return urlunparse(url_parts)\n\n def _headers_to_dict(self, headers: httpx.Headers) -> dict[str, str]:\n \"\"\"Convert HTTP headers to a dictionary with lowercased keys.\"\"\"\n return {k.lower(): v for k, v in headers.items()}\n\n def _process_headers(self, headers: Any) -> dict:\n \"\"\"Process the headers input into a valid dictionary.\"\"\"\n if headers is None:\n return {}\n if isinstance(headers, dict):\n return headers\n if isinstance(headers, list):\n return {item[\"key\"]: item[\"value\"] for item in headers if self._is_valid_key_value_item(item)}\n return {}\n\n async def make_api_request(self) -> Data:\n \"\"\"Make HTTP request with optimized parameter handling.\"\"\"\n method = self.method\n url = self.url_input.strip() if isinstance(self.url_input, str) else \"\"\n headers = self.headers or {}\n body = self.body or {}\n timeout = self.timeout\n follow_redirects = self.follow_redirects\n save_to_file = self.save_to_file\n include_httpx_metadata = self.include_httpx_metadata\n\n # if self.mode == \"cURL\" and self.curl_input:\n # self._build_config = self.parse_curl(self.curl_input, dotdict())\n # # After parsing curl, get the normalized URL\n # url = self._build_config[\"url_input\"][\"value\"]\n\n # Normalize URL before validation\n url = self._normalize_url(url)\n\n # Validate URL\n if not validators.url(url):\n msg = f\"Invalid URL provided: {url}\"\n raise ValueError(msg)\n\n # Process query parameters\n if isinstance(self.query_params, str):\n query_params = dict(parse_qsl(self.query_params))\n else:\n query_params = self.query_params.data if self.query_params else {}\n\n # Process headers and body\n headers = self._process_headers(headers)\n body = self._process_body(body)\n url = self.add_query_params(url, query_params)\n\n async with httpx.AsyncClient() as client:\n result = await self.make_request(\n client,\n method,\n url,\n headers,\n body,\n timeout,\n follow_redirects=follow_redirects,\n save_to_file=save_to_file,\n include_httpx_metadata=include_httpx_metadata,\n )\n self.status = result\n return result\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n \"\"\"Update the build config based on the selected mode.\"\"\"\n if field_name != \"mode\":\n if field_name == \"curl_input\" and self.mode == \"cURL\" and self.curl_input:\n return self.parse_curl(self.curl_input, build_config)\n return build_config\n\n # print(f\"Current mode: {field_value}\")\n if field_value == \"cURL\":\n set_field_display(build_config, \"curl_input\", value=True)\n if build_config[\"curl_input\"][\"value\"]:\n build_config = self.parse_curl(build_config[\"curl_input\"][\"value\"], build_config)\n else:\n set_field_display(build_config, \"curl_input\", value=False)\n\n return set_current_fields(\n build_config=build_config,\n action_fields=MODE_FIELDS,\n selected_action=field_value,\n default_fields=DEFAULT_FIELDS,\n func=set_field_advanced,\n default_value=True,\n )\n\n async def _response_info(\n self, response: httpx.Response, *, with_file_path: bool = False\n ) -> tuple[bool, Path | None]:\n \"\"\"Determine the file path and whether the response content is binary.\n\n Args:\n response (Response): The HTTP response object.\n with_file_path (bool): Whether to save the response content to a file.\n\n Returns:\n Tuple[bool, Path | None]:\n A tuple containing a boolean indicating if the content is binary and the full file path (if applicable).\n \"\"\"\n content_type = response.headers.get(\"Content-Type\", \"\")\n is_binary = \"application/octet-stream\" in content_type or \"application/binary\" in content_type\n\n if not with_file_path:\n return is_binary, None\n\n component_temp_dir = Path(tempfile.gettempdir()) / self.__class__.__name__\n\n # Create directory asynchronously\n await aiofiles_os.makedirs(component_temp_dir, exist_ok=True)\n\n filename = None\n if \"Content-Disposition\" in response.headers:\n content_disposition = response.headers[\"Content-Disposition\"]\n filename_match = re.search(r'filename=\"(.+?)\"', content_disposition)\n if filename_match:\n extracted_filename = filename_match.group(1)\n filename = extracted_filename\n\n # Step 3: Infer file extension or use part of the request URL if no filename\n if not filename:\n # Extract the last segment of the URL path\n url_path = urlparse(str(response.request.url) if response.request else \"\").path\n base_name = Path(url_path).name # Get the last segment of the path\n if not base_name: # If the path ends with a slash or is empty\n base_name = \"response\"\n\n # Infer file extension\n content_type_to_extension = {\n \"text/plain\": \".txt\",\n \"application/json\": \".json\",\n \"image/jpeg\": \".jpg\",\n \"image/png\": \".png\",\n \"application/octet-stream\": \".bin\",\n }\n extension = content_type_to_extension.get(content_type, \".bin\" if is_binary else \".txt\")\n filename = f\"{base_name}{extension}\"\n\n # Step 4: Define the full file path\n file_path = component_temp_dir / filename\n\n # Step 5: Check if file exists asynchronously and handle accordingly\n try:\n # Try to create the file exclusively (x mode) to check existence\n async with aiofiles.open(file_path, \"x\") as _:\n pass # File created successfully, we can use this path\n except FileExistsError:\n # If file exists, append a timestamp to the filename\n timestamp = datetime.now(timezone.utc).strftime(\"%Y%m%d%H%M%S%f\")\n file_path = component_temp_dir / f\"{timestamp}-{filename}\"\n\n return is_binary, file_path\n" + "value": "import json\nimport re\nimport tempfile\nfrom datetime import datetime, timezone\nfrom pathlib import Path\nfrom typing import Any\nfrom urllib.parse import parse_qsl, urlencode, urlparse, urlunparse\n\nimport aiofiles\nimport aiofiles.os as aiofiles_os\nimport httpx\nimport validators\n\nfrom langflow.base.curl.parse import parse_context\nfrom langflow.custom import Component\nfrom langflow.inputs.inputs import TabInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n IntInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.services.deps import get_settings_service\nfrom langflow.utils.component_utils import set_current_fields, set_field_advanced, set_field_display\n\n# Define fields for each mode\nMODE_FIELDS = {\n \"URL\": [\n \"url_input\",\n \"method\",\n ],\n \"cURL\": [\"curl_input\"],\n}\n\n# Fields that should always be visible\nDEFAULT_FIELDS = [\"mode\"]\n\n\nclass APIRequestComponent(Component):\n display_name = \"API Request\"\n description = \"Make HTTP requests using URL or cURL commands.\"\n icon = \"Globe\"\n name = \"APIRequest\"\n\n inputs = [\n MessageTextInput(\n name=\"url_input\",\n display_name=\"URL\",\n info=\"Enter the URL for the request.\",\n advanced=False,\n tool_mode=True,\n ),\n MultilineInput(\n name=\"curl_input\",\n display_name=\"cURL\",\n info=(\n \"Paste a curl command to populate the fields. \"\n \"This will fill in the dictionary fields for headers and body.\"\n ),\n real_time_refresh=True,\n tool_mode=True,\n advanced=True,\n show=False,\n ),\n DropdownInput(\n name=\"method\",\n display_name=\"Method\",\n options=[\"GET\", \"POST\", \"PATCH\", \"PUT\", \"DELETE\"],\n value=\"GET\",\n info=\"The HTTP method to use.\",\n real_time_refresh=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"URL\", \"cURL\"],\n value=\"URL\",\n info=\"Enable cURL mode to populate fields from a cURL command.\",\n real_time_refresh=True,\n ),\n DataInput(\n name=\"query_params\",\n display_name=\"Query Parameters\",\n info=\"The query parameters to append to the URL.\",\n advanced=True,\n ),\n TableInput(\n name=\"body\",\n display_name=\"Body\",\n info=\"The body to send with the request as a dictionary (for POST, PATCH, PUT).\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Key\",\n \"type\": \"str\",\n \"description\": \"Parameter name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"description\": \"Parameter value\",\n },\n ],\n value=[],\n input_types=[\"Data\"],\n advanced=True,\n real_time_refresh=True,\n ),\n TableInput(\n name=\"headers\",\n display_name=\"Headers\",\n info=\"The headers to send with the request\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Header\",\n \"type\": \"str\",\n \"description\": \"Header name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Header value\",\n },\n ],\n value=[{\"key\": \"User-Agent\", \"value\": get_settings_service().settings.user_agent}],\n advanced=True,\n input_types=[\"Data\"],\n real_time_refresh=True,\n ),\n IntInput(\n name=\"timeout\",\n display_name=\"Timeout\",\n value=30,\n info=\"The timeout to use for the request.\",\n advanced=True,\n ),\n BoolInput(\n name=\"follow_redirects\",\n display_name=\"Follow Redirects\",\n value=True,\n info=\"Whether to follow http redirects.\",\n advanced=True,\n ),\n BoolInput(\n name=\"save_to_file\",\n display_name=\"Save to File\",\n value=False,\n info=\"Save the API response to a temporary file\",\n advanced=True,\n ),\n BoolInput(\n name=\"include_httpx_metadata\",\n display_name=\"Include HTTPx Metadata\",\n value=False,\n info=(\n \"Include properties such as headers, status_code, response_headers, \"\n \"and redirection_history in the output.\"\n ),\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"API Response\", name=\"data\", method=\"make_api_request\"),\n ]\n\n def _parse_json_value(self, value: Any) -> Any:\n \"\"\"Parse a value that might be a JSON string.\"\"\"\n if not isinstance(value, str):\n return value\n\n try:\n parsed = json.loads(value)\n except json.JSONDecodeError:\n return value\n else:\n return parsed\n\n def _process_body(self, body: Any) -> dict:\n \"\"\"Process the body input into a valid dictionary.\"\"\"\n if body is None:\n return {}\n if isinstance(body, dict):\n return self._process_dict_body(body)\n if isinstance(body, str):\n return self._process_string_body(body)\n if isinstance(body, list):\n return self._process_list_body(body)\n return {}\n\n def _process_dict_body(self, body: dict) -> dict:\n \"\"\"Process dictionary body by parsing JSON values.\"\"\"\n return {k: self._parse_json_value(v) for k, v in body.items()}\n\n def _process_string_body(self, body: str) -> dict:\n \"\"\"Process string body by attempting JSON parse.\"\"\"\n try:\n return self._process_body(json.loads(body))\n except json.JSONDecodeError:\n return {\"data\": body}\n\n def _process_list_body(self, body: list) -> dict:\n \"\"\"Process list body by converting to key-value dictionary.\"\"\"\n processed_dict = {}\n try:\n for item in body:\n if not self._is_valid_key_value_item(item):\n continue\n key = item[\"key\"]\n value = self._parse_json_value(item[\"value\"])\n processed_dict[key] = value\n except (KeyError, TypeError, ValueError) as e:\n self.log(f\"Failed to process body list: {e}\")\n return {}\n return processed_dict\n\n def _is_valid_key_value_item(self, item: Any) -> bool:\n \"\"\"Check if an item is a valid key-value dictionary.\"\"\"\n return isinstance(item, dict) and \"key\" in item and \"value\" in item\n\n def parse_curl(self, curl: str, build_config: dotdict) -> dotdict:\n \"\"\"Parse a cURL command and update build configuration.\"\"\"\n try:\n parsed = parse_context(curl)\n\n # Update basic configuration\n url = parsed.url\n # Normalize URL before setting it\n url = self._normalize_url(url)\n\n build_config[\"url_input\"][\"value\"] = url\n build_config[\"method\"][\"value\"] = parsed.method.upper()\n\n # Process headers\n headers_list = [{\"key\": k, \"value\": v} for k, v in parsed.headers.items()]\n build_config[\"headers\"][\"value\"] = headers_list\n\n # Process body data\n if not parsed.data:\n build_config[\"body\"][\"value\"] = []\n elif parsed.data:\n try:\n json_data = json.loads(parsed.data)\n if isinstance(json_data, dict):\n body_list = [\n {\"key\": k, \"value\": json.dumps(v) if isinstance(v, dict | list) else str(v)}\n for k, v in json_data.items()\n ]\n build_config[\"body\"][\"value\"] = body_list\n else:\n build_config[\"body\"][\"value\"] = [{\"key\": \"data\", \"value\": json.dumps(json_data)}]\n except json.JSONDecodeError:\n build_config[\"body\"][\"value\"] = [{\"key\": \"data\", \"value\": parsed.data}]\n\n except Exception as exc:\n msg = f\"Error parsing curl: {exc}\"\n self.log(msg)\n raise ValueError(msg) from exc\n\n return build_config\n\n def _normalize_url(self, url: str) -> str:\n \"\"\"Normalize URL by adding https:// if no protocol is specified.\"\"\"\n if not url or not isinstance(url, str):\n msg = \"URL cannot be empty\"\n raise ValueError(msg)\n\n url = url.strip()\n if url.startswith((\"http://\", \"https://\")):\n return url\n return f\"https://{url}\"\n\n async def make_request(\n self,\n client: httpx.AsyncClient,\n method: str,\n url: str,\n headers: dict | None = None,\n body: Any = None,\n timeout: int = 5,\n *,\n follow_redirects: bool = True,\n save_to_file: bool = False,\n include_httpx_metadata: bool = False,\n ) -> Data:\n method = method.upper()\n if method not in {\"GET\", \"POST\", \"PATCH\", \"PUT\", \"DELETE\"}:\n msg = f\"Unsupported method: {method}\"\n raise ValueError(msg)\n\n processed_body = self._process_body(body)\n redirection_history = []\n\n try:\n # Prepare request parameters\n request_params = {\n \"method\": method,\n \"url\": url,\n \"headers\": headers,\n \"json\": processed_body,\n \"timeout\": timeout,\n \"follow_redirects\": follow_redirects,\n }\n response = await client.request(**request_params)\n\n redirection_history = [\n {\n \"url\": redirect.headers.get(\"Location\", str(redirect.url)),\n \"status_code\": redirect.status_code,\n }\n for redirect in response.history\n ]\n\n is_binary, file_path = await self._response_info(response, with_file_path=save_to_file)\n response_headers = self._headers_to_dict(response.headers)\n\n # Base metadata\n metadata = {\n \"source\": url,\n \"status_code\": response.status_code,\n \"response_headers\": response_headers,\n }\n\n if redirection_history:\n metadata[\"redirection_history\"] = redirection_history\n\n if save_to_file:\n mode = \"wb\" if is_binary else \"w\"\n encoding = response.encoding if mode == \"w\" else None\n if file_path:\n await aiofiles_os.makedirs(file_path.parent, exist_ok=True)\n if is_binary:\n async with aiofiles.open(file_path, \"wb\") as f:\n await f.write(response.content)\n await f.flush()\n else:\n async with aiofiles.open(file_path, \"w\", encoding=encoding) as f:\n await f.write(response.text)\n await f.flush()\n metadata[\"file_path\"] = str(file_path)\n\n if include_httpx_metadata:\n metadata.update({\"headers\": headers})\n return Data(data=metadata)\n\n # Handle response content\n if is_binary:\n result = response.content\n else:\n try:\n result = response.json()\n except json.JSONDecodeError:\n self.log(\"Failed to decode JSON response\")\n result = response.text.encode(\"utf-8\")\n\n metadata[\"result\"] = result\n\n if include_httpx_metadata:\n metadata.update({\"headers\": headers})\n\n return Data(data=metadata)\n except (httpx.HTTPError, httpx.RequestError, httpx.TimeoutException) as exc:\n self.log(f\"Error making request to {url}\")\n return Data(\n data={\n \"source\": url,\n \"headers\": headers,\n \"status_code\": 500,\n \"error\": str(exc),\n **({\"redirection_history\": redirection_history} if redirection_history else {}),\n },\n )\n\n def add_query_params(self, url: str, params: dict) -> str:\n \"\"\"Add query parameters to URL efficiently.\"\"\"\n if not params:\n return url\n url_parts = list(urlparse(url))\n query = dict(parse_qsl(url_parts[4]))\n query.update(params)\n url_parts[4] = urlencode(query)\n return urlunparse(url_parts)\n\n def _headers_to_dict(self, headers: httpx.Headers) -> dict[str, str]:\n \"\"\"Convert HTTP headers to a dictionary with lowercased keys.\"\"\"\n return {k.lower(): v for k, v in headers.items()}\n\n def _process_headers(self, headers: Any) -> dict:\n \"\"\"Process the headers input into a valid dictionary.\"\"\"\n if headers is None:\n return {}\n if isinstance(headers, dict):\n return headers\n if isinstance(headers, list):\n return {item[\"key\"]: item[\"value\"] for item in headers if self._is_valid_key_value_item(item)}\n return {}\n\n async def make_api_request(self) -> Data:\n \"\"\"Make HTTP request with optimized parameter handling.\"\"\"\n method = self.method\n url = self.url_input.strip() if isinstance(self.url_input, str) else \"\"\n headers = self.headers or {}\n body = self.body or {}\n timeout = self.timeout\n follow_redirects = self.follow_redirects\n save_to_file = self.save_to_file\n include_httpx_metadata = self.include_httpx_metadata\n\n # if self.mode == \"cURL\" and self.curl_input:\n # self._build_config = self.parse_curl(self.curl_input, dotdict())\n # # After parsing curl, get the normalized URL\n # url = self._build_config[\"url_input\"][\"value\"]\n\n # Normalize URL before validation\n url = self._normalize_url(url)\n\n # Validate URL\n if not validators.url(url):\n msg = f\"Invalid URL provided: {url}\"\n raise ValueError(msg)\n\n # Process query parameters\n if isinstance(self.query_params, str):\n query_params = dict(parse_qsl(self.query_params))\n else:\n query_params = self.query_params.data if self.query_params else {}\n\n # Process headers and body\n headers = self._process_headers(headers)\n body = self._process_body(body)\n url = self.add_query_params(url, query_params)\n\n async with httpx.AsyncClient() as client:\n result = await self.make_request(\n client,\n method,\n url,\n headers,\n body,\n timeout,\n follow_redirects=follow_redirects,\n save_to_file=save_to_file,\n include_httpx_metadata=include_httpx_metadata,\n )\n self.status = result\n return result\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n \"\"\"Update the build config based on the selected mode.\"\"\"\n if field_name != \"mode\":\n if field_name == \"curl_input\" and self.mode == \"cURL\" and self.curl_input:\n return self.parse_curl(self.curl_input, build_config)\n return build_config\n\n # print(f\"Current mode: {field_value}\")\n if field_value == \"cURL\":\n set_field_display(build_config, \"curl_input\", value=True)\n if build_config[\"curl_input\"][\"value\"]:\n build_config = self.parse_curl(build_config[\"curl_input\"][\"value\"], build_config)\n else:\n set_field_display(build_config, \"curl_input\", value=False)\n\n return set_current_fields(\n build_config=build_config,\n action_fields=MODE_FIELDS,\n selected_action=field_value,\n default_fields=DEFAULT_FIELDS,\n func=set_field_advanced,\n default_value=True,\n )\n\n async def _response_info(\n self, response: httpx.Response, *, with_file_path: bool = False\n ) -> tuple[bool, Path | None]:\n \"\"\"Determine the file path and whether the response content is binary.\n\n Args:\n response (Response): The HTTP response object.\n with_file_path (bool): Whether to save the response content to a file.\n\n Returns:\n Tuple[bool, Path | None]:\n A tuple containing a boolean indicating if the content is binary and the full file path (if applicable).\n \"\"\"\n content_type = response.headers.get(\"Content-Type\", \"\")\n is_binary = \"application/octet-stream\" in content_type or \"application/binary\" in content_type\n\n if not with_file_path:\n return is_binary, None\n\n component_temp_dir = Path(tempfile.gettempdir()) / self.__class__.__name__\n\n # Create directory asynchronously\n await aiofiles_os.makedirs(component_temp_dir, exist_ok=True)\n\n filename = None\n if \"Content-Disposition\" in response.headers:\n content_disposition = response.headers[\"Content-Disposition\"]\n filename_match = re.search(r'filename=\"(.+?)\"', content_disposition)\n if filename_match:\n extracted_filename = filename_match.group(1)\n filename = extracted_filename\n\n # Step 3: Infer file extension or use part of the request URL if no filename\n if not filename:\n # Extract the last segment of the URL path\n url_path = urlparse(str(response.request.url) if response.request else \"\").path\n base_name = Path(url_path).name # Get the last segment of the path\n if not base_name: # If the path ends with a slash or is empty\n base_name = \"response\"\n\n # Infer file extension\n content_type_to_extension = {\n \"text/plain\": \".txt\",\n \"application/json\": \".json\",\n \"image/jpeg\": \".jpg\",\n \"image/png\": \".png\",\n \"application/octet-stream\": \".bin\",\n }\n extension = content_type_to_extension.get(content_type, \".bin\" if is_binary else \".txt\")\n filename = f\"{base_name}{extension}\"\n\n # Step 4: Define the full file path\n file_path = component_temp_dir / filename\n\n # Step 5: Check if file exists asynchronously and handle accordingly\n try:\n # Try to create the file exclusively (x mode) to check existence\n async with aiofiles.open(file_path, \"x\") as _:\n pass # File created successfully, we can use this path\n except FileExistsError:\n # If file exists, append a timestamp to the filename\n timestamp = datetime.now(timezone.utc).strftime(\"%Y%m%d%H%M%S%f\")\n file_path = component_temp_dir / f\"{timestamp}-{filename}\"\n\n return is_binary, file_path\n" }, "curl_input": { "_input_type": "MultilineInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index a252a4d5512a..dc19be2262bc 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -152,9 +152,7 @@ "dataType": "SplitText", "id": "SplitText-aHhAi", "name": "chunks", - "output_types": [ - "Data" - ] + "output_types": [] }, "targetHandle": { "fieldName": "ingest_data", @@ -169,7 +167,7 @@ "id": "reactflow__edge-SplitText-aHhAi{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-aHhAiœ,œnameœ:œchunksœ,œoutput_typesœ:[œDataœ]}-AstraDB-lXzoG{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-lXzoGœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", "selected": false, "source": "SplitText-aHhAi", - "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-aHhAiœ, œnameœ: œchunksœ, œoutput_typesœ: [œDataœ]}", + "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-aHhAiœ, œnameœ: œchunksœ, œoutput_typesœ: []}", "target": "AstraDB-lXzoG", "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-xD6epœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" }, @@ -820,20 +818,6 @@ "display_name": "Chunks", "group_outputs": false, "method": "split_text", - "name": "chunks", - "selected": "Data", - "tool_mode": true, - "types": [ - "Data" - ], - "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "DataFrame", - "group_outputs": false, - "method": "as_dataframe", "name": "dataframe", "selected": "DataFrame", "tool_mode": true, @@ -892,7 +876,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema import Data, DataFrame\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n display_name: str = \"Split Text\"\n description: str = \"Split text into chunks based on specified criteria.\"\n icon = \"scissors-line-dashed\"\n name = \"SplitText\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Data or DataFrame\",\n info=\"The data with texts to split in chunks.\",\n input_types=[\"Data\", \"DataFrame\"],\n required=True,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"Number of characters to overlap between chunks.\",\n value=200,\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=(\n \"The maximum length of each chunk. Text is first split by separator, \"\n \"then chunks are merged up to this size. \"\n \"Individual splits larger than this won't be further divided.\"\n ),\n value=1000,\n ),\n MessageTextInput(\n name=\"separator\",\n display_name=\"Separator\",\n info=(\n \"The character to split on. Use \\\\n for newline. \"\n \"Examples: \\\\n\\\\n for paragraphs, \\\\n for lines, . for sentences\"\n ),\n value=\"\\n\",\n ),\n MessageTextInput(\n name=\"text_key\",\n display_name=\"Text Key\",\n info=\"The key to use for the text column.\",\n value=\"text\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keep_separator\",\n display_name=\"Keep Separator\",\n info=\"Whether to keep the separator in the output chunks and where to place it.\",\n options=[\"False\", \"True\", \"Start\", \"End\"],\n value=\"False\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Chunks\", name=\"chunks\", method=\"split_text\"),\n Output(display_name=\"DataFrame\", name=\"dataframe\", method=\"as_dataframe\"),\n ]\n\n def _docs_to_data(self, docs) -> list[Data]:\n return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]\n\n def _fix_separator(self, separator: str) -> str:\n \"\"\"Fix common separator issues and convert to proper format.\"\"\"\n if separator == \"/n\":\n return \"\\n\"\n if separator == \"/t\":\n return \"\\t\"\n return separator\n\n def split_text_base(self):\n separator = self._fix_separator(self.separator)\n separator = unescape_string(separator)\n\n if isinstance(self.data_inputs, DataFrame):\n if not len(self.data_inputs):\n msg = \"DataFrame is empty\"\n raise TypeError(msg)\n\n self.data_inputs.text_key = self.text_key\n try:\n documents = self.data_inputs.to_lc_documents()\n except Exception as e:\n msg = f\"Error converting DataFrame to documents: {e}\"\n raise TypeError(msg) from e\n else:\n if not self.data_inputs:\n msg = \"No data inputs provided\"\n raise TypeError(msg)\n\n documents = []\n if isinstance(self.data_inputs, Data):\n self.data_inputs.text_key = self.text_key\n documents = [self.data_inputs.to_lc_document()]\n else:\n try:\n documents = [input_.to_lc_document() for input_ in self.data_inputs if isinstance(input_, Data)]\n if not documents:\n msg = f\"No valid Data inputs found in {type(self.data_inputs)}\"\n raise TypeError(msg)\n except AttributeError as e:\n msg = f\"Invalid input type in collection: {e}\"\n raise TypeError(msg) from e\n try:\n # Convert string 'False'/'True' to boolean\n keep_sep = self.keep_separator\n if isinstance(keep_sep, str):\n if keep_sep.lower() == \"false\":\n keep_sep = False\n elif keep_sep.lower() == \"true\":\n keep_sep = True\n # 'start' and 'end' are kept as strings\n\n splitter = CharacterTextSplitter(\n chunk_overlap=self.chunk_overlap,\n chunk_size=self.chunk_size,\n separator=separator,\n keep_separator=keep_sep,\n )\n return splitter.split_documents(documents)\n except Exception as e:\n msg = f\"Error splitting text: {e}\"\n raise TypeError(msg) from e\n\n def split_text(self) -> list[Data]:\n return self._docs_to_data(self.split_text_base())\n\n def as_dataframe(self) -> DataFrame:\n return DataFrame(self.split_text())\n" + "value": "from langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema import Data, DataFrame\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n display_name: str = \"Split Text\"\n description: str = \"Split text into chunks based on specified criteria.\"\n icon = \"scissors-line-dashed\"\n name = \"SplitText\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Data or DataFrame\",\n info=\"The data with texts to split in chunks.\",\n input_types=[\"Data\", \"DataFrame\"],\n required=True,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"Number of characters to overlap between chunks.\",\n value=200,\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=(\n \"The maximum length of each chunk. Text is first split by separator, \"\n \"then chunks are merged up to this size. \"\n \"Individual splits larger than this won't be further divided.\"\n ),\n value=1000,\n ),\n MessageTextInput(\n name=\"separator\",\n display_name=\"Separator\",\n info=(\n \"The character to split on. Use \\\\n for newline. \"\n \"Examples: \\\\n\\\\n for paragraphs, \\\\n for lines, . for sentences\"\n ),\n value=\"\\n\",\n ),\n MessageTextInput(\n name=\"text_key\",\n display_name=\"Text Key\",\n info=\"The key to use for the text column.\",\n value=\"text\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keep_separator\",\n display_name=\"Keep Separator\",\n info=\"Whether to keep the separator in the output chunks and where to place it.\",\n options=[\"False\", \"True\", \"Start\", \"End\"],\n value=\"False\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Chunks\", name=\"dataframe\", method=\"split_text\"),\n ]\n\n def _docs_to_data(self, docs) -> list[Data]:\n return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]\n\n def _fix_separator(self, separator: str) -> str:\n \"\"\"Fix common separator issues and convert to proper format.\"\"\"\n if separator == \"/n\":\n return \"\\n\"\n if separator == \"/t\":\n return \"\\t\"\n return separator\n\n def split_text_base(self):\n separator = self._fix_separator(self.separator)\n separator = unescape_string(separator)\n\n if isinstance(self.data_inputs, DataFrame):\n if not len(self.data_inputs):\n msg = \"DataFrame is empty\"\n raise TypeError(msg)\n\n self.data_inputs.text_key = self.text_key\n try:\n documents = self.data_inputs.to_lc_documents()\n except Exception as e:\n msg = f\"Error converting DataFrame to documents: {e}\"\n raise TypeError(msg) from e\n else:\n if not self.data_inputs:\n msg = \"No data inputs provided\"\n raise TypeError(msg)\n\n documents = []\n if isinstance(self.data_inputs, Data):\n self.data_inputs.text_key = self.text_key\n documents = [self.data_inputs.to_lc_document()]\n else:\n try:\n documents = [input_.to_lc_document() for input_ in self.data_inputs if isinstance(input_, Data)]\n if not documents:\n msg = f\"No valid Data inputs found in {type(self.data_inputs)}\"\n raise TypeError(msg)\n except AttributeError as e:\n msg = f\"Invalid input type in collection: {e}\"\n raise TypeError(msg) from e\n try:\n # Convert string 'False'/'True' to boolean\n keep_sep = self.keep_separator\n if isinstance(keep_sep, str):\n if keep_sep.lower() == \"false\":\n keep_sep = False\n elif keep_sep.lower() == \"true\":\n keep_sep = True\n # 'start' and 'end' are kept as strings\n\n splitter = CharacterTextSplitter(\n chunk_overlap=self.chunk_overlap,\n chunk_size=self.chunk_size,\n separator=separator,\n keep_separator=keep_sep,\n )\n return splitter.split_documents(documents)\n except Exception as e:\n msg = f\"Error splitting text: {e}\"\n raise TypeError(msg) from e\n\n def split_text(self) -> DataFrame:\n return DataFrame(self._docs_to_data(self.split_text_base()))\n" }, "data_inputs": { "advanced": false, From 13b633557aa894453c9a89746f9cf15b7ced5f99 Mon Sep 17 00:00:00 2001 From: Mike Fortman Date: Thu, 5 Jun 2025 11:51:47 -0500 Subject: [PATCH 4/8] test fix --- src/frontend/tests/core/features/filterSidebar.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/frontend/tests/core/features/filterSidebar.spec.ts b/src/frontend/tests/core/features/filterSidebar.spec.ts index fa58bd6e129e..fed8bd540628 100644 --- a/src/frontend/tests/core/features/filterSidebar.spec.ts +++ b/src/frontend/tests/core/features/filterSidebar.spec.ts @@ -133,7 +133,7 @@ test( await expect(page.getByTestId("logicSub Flow [Deprecated]")).toBeVisible(); - await expect(page.getByTestId("processingSplit Text")).toBeVisible(); + await expect(page.getByTestId("processingData Operations")).toBeVisible(); await page.getByTestId("icon-X").first().click(); From 580d119f5c4ce92ca6dda0dc7afe8bf887892357 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 9 Jun 2025 13:15:42 -0700 Subject: [PATCH 5/8] Update Vector Store RAG.json --- .../initial_setup/starter_projects/Vector Store RAG.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index fcecdf770182..13a06f8ee986 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -876,7 +876,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema import Data, DataFrame\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n display_name: str = \"Split Text\"\n description: str = \"Split text into chunks based on specified criteria.\"\n icon = \"scissors-line-dashed\"\n name = \"SplitText\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Data or DataFrame\",\n info=\"The data with texts to split in chunks.\",\n input_types=[\"Data\", \"DataFrame\"],\n required=True,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"Number of characters to overlap between chunks.\",\n value=200,\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=(\n \"The maximum length of each chunk. Text is first split by separator, \"\n \"then chunks are merged up to this size. \"\n \"Individual splits larger than this won't be further divided.\"\n ),\n value=1000,\n ),\n MessageTextInput(\n name=\"separator\",\n display_name=\"Separator\",\n info=(\n \"The character to split on. Use \\\\n for newline. \"\n \"Examples: \\\\n\\\\n for paragraphs, \\\\n for lines, . for sentences\"\n ),\n value=\"\\n\",\n ),\n MessageTextInput(\n name=\"text_key\",\n display_name=\"Text Key\",\n info=\"The key to use for the text column.\",\n value=\"text\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keep_separator\",\n display_name=\"Keep Separator\",\n info=\"Whether to keep the separator in the output chunks and where to place it.\",\n options=[\"False\", \"True\", \"Start\", \"End\"],\n value=\"False\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Chunks\", name=\"dataframe\", method=\"split_text\"),\n ]\n\n def _docs_to_data(self, docs) -> list[Data]:\n return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]\n\n def _fix_separator(self, separator: str) -> str:\n \"\"\"Fix common separator issues and convert to proper format.\"\"\"\n if separator == \"/n\":\n return \"\\n\"\n if separator == \"/t\":\n return \"\\t\"\n return separator\n\n def split_text_base(self):\n separator = self._fix_separator(self.separator)\n separator = unescape_string(separator)\n\n if isinstance(self.data_inputs, DataFrame):\n if not len(self.data_inputs):\n msg = \"DataFrame is empty\"\n raise TypeError(msg)\n\n self.data_inputs.text_key = self.text_key\n try:\n documents = self.data_inputs.to_lc_documents()\n except Exception as e:\n msg = f\"Error converting DataFrame to documents: {e}\"\n raise TypeError(msg) from e\n else:\n if not self.data_inputs:\n msg = \"No data inputs provided\"\n raise TypeError(msg)\n\n documents = []\n if isinstance(self.data_inputs, Data):\n self.data_inputs.text_key = self.text_key\n documents = [self.data_inputs.to_lc_document()]\n else:\n try:\n documents = [input_.to_lc_document() for input_ in self.data_inputs if isinstance(input_, Data)]\n if not documents:\n msg = f\"No valid Data inputs found in {type(self.data_inputs)}\"\n raise TypeError(msg)\n except AttributeError as e:\n msg = f\"Invalid input type in collection: {e}\"\n raise TypeError(msg) from e\n try:\n # Convert string 'False'/'True' to boolean\n keep_sep = self.keep_separator\n if isinstance(keep_sep, str):\n if keep_sep.lower() == \"false\":\n keep_sep = False\n elif keep_sep.lower() == \"true\":\n keep_sep = True\n # 'start' and 'end' are kept as strings\n\n splitter = CharacterTextSplitter(\n chunk_overlap=self.chunk_overlap,\n chunk_size=self.chunk_size,\n separator=separator,\n keep_separator=keep_sep,\n )\n return splitter.split_documents(documents)\n except Exception as e:\n msg = f\"Error splitting text: {e}\"\n raise TypeError(msg) from e\n\n def split_text(self) -> DataFrame:\n return DataFrame(self._docs_to_data(self.split_text_base()))\n + "value": "from langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema import Data, DataFrame\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n display_name: str = \"Split Text\"\n description: str = \"Split text into chunks based on specified criteria.\"\n icon = \"scissors-line-dashed\"\n name = \"SplitText\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Data or DataFrame\",\n info=\"The data with texts to split in chunks.\",\n input_types=[\"Data\", \"DataFrame\"],\n required=True,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"Number of characters to overlap between chunks.\",\n value=200,\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=(\n \"The maximum length of each chunk. Text is first split by separator, \"\n \"then chunks are merged up to this size. \"\n \"Individual splits larger than this won't be further divided.\"\n ),\n value=1000,\n ),\n MessageTextInput(\n name=\"separator\",\n display_name=\"Separator\",\n info=(\n \"The character to split on. Use \\\\n for newline. \"\n \"Examples: \\\\n\\\\n for paragraphs, \\\\n for lines, . for sentences\"\n ),\n value=\"\\n\",\n ),\n MessageTextInput(\n name=\"text_key\",\n display_name=\"Text Key\",\n info=\"The key to use for the text column.\",\n value=\"text\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keep_separator\",\n display_name=\"Keep Separator\",\n info=\"Whether to keep the separator in the output chunks and where to place it.\",\n options=[\"False\", \"True\", \"Start\", \"End\"],\n value=\"False\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Chunks\", name=\"dataframe\", method=\"split_text\"),\n ]\n\n def _docs_to_data(self, docs) -> list[Data]:\n return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]\n\n def _fix_separator(self, separator: str) -> str:\n \"\"\"Fix common separator issues and convert to proper format.\"\"\"\n if separator == \"/n\":\n return \"\\n\"\n if separator == \"/t\":\n return \"\\t\"\n return separator\n\n def split_text_base(self):\n separator = self._fix_separator(self.separator)\n separator = unescape_string(separator)\n\n if isinstance(self.data_inputs, DataFrame):\n if not len(self.data_inputs):\n msg = \"DataFrame is empty\"\n raise TypeError(msg)\n\n self.data_inputs.text_key = self.text_key\n try:\n documents = self.data_inputs.to_lc_documents()\n except Exception as e:\n msg = f\"Error converting DataFrame to documents: {e}\"\n raise TypeError(msg) from e\n else:\n if not self.data_inputs:\n msg = \"No data inputs provided\"\n raise TypeError(msg)\n\n documents = []\n if isinstance(self.data_inputs, Data):\n self.data_inputs.text_key = self.text_key\n documents = [self.data_inputs.to_lc_document()]\n else:\n try:\n documents = [input_.to_lc_document() for input_ in self.data_inputs if isinstance(input_, Data)]\n if not documents:\n msg = f\"No valid Data inputs found in {type(self.data_inputs)}\"\n raise TypeError(msg)\n except AttributeError as e:\n msg = f\"Invalid input type in collection: {e}\"\n raise TypeError(msg) from e\n try:\n # Convert string 'False'/'True' to boolean\n keep_sep = self.keep_separator\n if isinstance(keep_sep, str):\n if keep_sep.lower() == \"false\":\n keep_sep = False\n elif keep_sep.lower() == \"true\":\n keep_sep = True\n # 'start' and 'end' are kept as strings\n\n splitter = CharacterTextSplitter(\n chunk_overlap=self.chunk_overlap,\n chunk_size=self.chunk_size,\n separator=separator,\n keep_separator=keep_sep,\n )\n return splitter.split_documents(documents)\n except Exception as e:\n msg = f\"Error splitting text: {e}\"\n raise TypeError(msg) from e\n\n def split_text(self) -> DataFrame:\n return DataFrame(self._docs_to_data(self.split_text_base()))\n" }, "data_inputs": { "advanced": false, From e99e296eff2e78d96bdd465fc51a77e894f157d3 Mon Sep 17 00:00:00 2001 From: Eric Hare Date: Mon, 9 Jun 2025 13:32:51 -0700 Subject: [PATCH 6/8] Update starter projects --- .../initial_setup/starter_projects/Pok\303\251dex Agent.json" | 2 +- .../initial_setup/starter_projects/Vector Store RAG.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git "a/src/backend/base/langflow/initial_setup/starter_projects/Pok\303\251dex Agent.json" "b/src/backend/base/langflow/initial_setup/starter_projects/Pok\303\251dex Agent.json" index e8d769e4dedb..877928cc2057 100644 --- "a/src/backend/base/langflow/initial_setup/starter_projects/Pok\303\251dex Agent.json" +++ "b/src/backend/base/langflow/initial_setup/starter_projects/Pok\303\251dex Agent.json" @@ -922,7 +922,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import json\nimport re\nimport tempfile\nfrom datetime import datetime, timezone\nfrom pathlib import Path\nfrom typing import Any\nfrom urllib.parse import parse_qsl, urlencode, urlparse, urlunparse\n\nimport aiofiles\nimport aiofiles.os as aiofiles_os\nimport httpx\nimport validators\n\nfrom langflow.base.curl.parse import parse_context\nfrom langflow.custom import Component\nfrom langflow.inputs.inputs import TabInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n IntInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.services.deps import get_settings_service\nfrom langflow.utils.component_utils import set_current_fields, set_field_advanced, set_field_display\n\n# Define fields for each mode\nMODE_FIELDS = {\n \"URL\": [\n \"url_input\",\n \"method\",\n ],\n \"cURL\": [\"curl_input\"],\n}\n\n# Fields that should always be visible\nDEFAULT_FIELDS = [\"mode\"]\n\n\nclass APIRequestComponent(Component):\n display_name = \"API Request\"\n description = \"Make HTTP requests using URL or cURL commands.\"\n icon = \"Globe\"\n name = \"APIRequest\"\n\n inputs = [\n MessageTextInput(\n name=\"url_input\",\n display_name=\"URL\",\n info=\"Enter the URL for the request.\",\n advanced=False,\n tool_mode=True,\n ),\n MultilineInput(\n name=\"curl_input\",\n display_name=\"cURL\",\n info=(\n \"Paste a curl command to populate the fields. \"\n \"This will fill in the dictionary fields for headers and body.\"\n ),\n real_time_refresh=True,\n tool_mode=True,\n advanced=True,\n show=False,\n ),\n DropdownInput(\n name=\"method\",\n display_name=\"Method\",\n options=[\"GET\", \"POST\", \"PATCH\", \"PUT\", \"DELETE\"],\n value=\"GET\",\n info=\"The HTTP method to use.\",\n real_time_refresh=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"URL\", \"cURL\"],\n value=\"URL\",\n info=\"Enable cURL mode to populate fields from a cURL command.\",\n real_time_refresh=True,\n ),\n DataInput(\n name=\"query_params\",\n display_name=\"Query Parameters\",\n info=\"The query parameters to append to the URL.\",\n advanced=True,\n ),\n TableInput(\n name=\"body\",\n display_name=\"Body\",\n info=\"The body to send with the request as a dictionary (for POST, PATCH, PUT).\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Key\",\n \"type\": \"str\",\n \"description\": \"Parameter name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"description\": \"Parameter value\",\n },\n ],\n value=[],\n input_types=[\"Data\"],\n advanced=True,\n real_time_refresh=True,\n ),\n TableInput(\n name=\"headers\",\n display_name=\"Headers\",\n info=\"The headers to send with the request\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Header\",\n \"type\": \"str\",\n \"description\": \"Header name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Header value\",\n },\n ],\n value=[{\"key\": \"User-Agent\", \"value\": get_settings_service().settings.user_agent}],\n advanced=True,\n input_types=[\"Data\"],\n real_time_refresh=True,\n ),\n IntInput(\n name=\"timeout\",\n display_name=\"Timeout\",\n value=30,\n info=\"The timeout to use for the request.\",\n advanced=True,\n ),\n BoolInput(\n name=\"follow_redirects\",\n display_name=\"Follow Redirects\",\n value=True,\n info=\"Whether to follow http redirects.\",\n advanced=True,\n ),\n BoolInput(\n name=\"save_to_file\",\n display_name=\"Save to File\",\n value=False,\n info=\"Save the API response to a temporary file\",\n advanced=True,\n ),\n BoolInput(\n name=\"include_httpx_metadata\",\n display_name=\"Include HTTPx Metadata\",\n value=False,\n info=(\n \"Include properties such as headers, status_code, response_headers, \"\n \"and redirection_history in the output.\"\n ),\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"API Response\", name=\"data\", method=\"make_api_request\"),\n ]\n\n def _parse_json_value(self, value: Any) -> Any:\n \"\"\"Parse a value that might be a JSON string.\"\"\"\n if not isinstance(value, str):\n return value\n\n try:\n parsed = json.loads(value)\n except json.JSONDecodeError:\n return value\n else:\n return parsed\n\n def _process_body(self, body: Any) -> dict:\n \"\"\"Process the body input into a valid dictionary.\"\"\"\n if body is None:\n return {}\n if isinstance(body, dict):\n return self._process_dict_body(body)\n if isinstance(body, str):\n return self._process_string_body(body)\n if isinstance(body, list):\n return self._process_list_body(body)\n return {}\n\n def _process_dict_body(self, body: dict) -> dict:\n \"\"\"Process dictionary body by parsing JSON values.\"\"\"\n return {k: self._parse_json_value(v) for k, v in body.items()}\n\n def _process_string_body(self, body: str) -> dict:\n \"\"\"Process string body by attempting JSON parse.\"\"\"\n try:\n return self._process_body(json.loads(body))\n except json.JSONDecodeError:\n return {\"data\": body}\n\n def _process_list_body(self, body: list) -> dict:\n \"\"\"Process list body by converting to key-value dictionary.\"\"\"\n processed_dict = {}\n try:\n for item in body:\n if not self._is_valid_key_value_item(item):\n continue\n key = item[\"key\"]\n value = self._parse_json_value(item[\"value\"])\n processed_dict[key] = value\n except (KeyError, TypeError, ValueError) as e:\n self.log(f\"Failed to process body list: {e}\")\n return {}\n return processed_dict\n\n def _is_valid_key_value_item(self, item: Any) -> bool:\n \"\"\"Check if an item is a valid key-value dictionary.\"\"\"\n return isinstance(item, dict) and \"key\" in item and \"value\" in item\n\n def parse_curl(self, curl: str, build_config: dotdict) -> dotdict:\n \"\"\"Parse a cURL command and update build configuration.\"\"\"\n try:\n parsed = parse_context(curl)\n\n # Update basic configuration\n url = parsed.url\n # Normalize URL before setting it\n url = self._normalize_url(url)\n\n build_config[\"url_input\"][\"value\"] = url\n build_config[\"method\"][\"value\"] = parsed.method.upper()\n\n # Process headers\n headers_list = [{\"key\": k, \"value\": v} for k, v in parsed.headers.items()]\n build_config[\"headers\"][\"value\"] = headers_list\n\n # Process body data\n if not parsed.data:\n build_config[\"body\"][\"value\"] = []\n elif parsed.data:\n try:\n json_data = json.loads(parsed.data)\n if isinstance(json_data, dict):\n body_list = [\n {\"key\": k, \"value\": json.dumps(v) if isinstance(v, dict | list) else str(v)}\n for k, v in json_data.items()\n ]\n build_config[\"body\"][\"value\"] = body_list\n else:\n build_config[\"body\"][\"value\"] = [{\"key\": \"data\", \"value\": json.dumps(json_data)}]\n except json.JSONDecodeError:\n build_config[\"body\"][\"value\"] = [{\"key\": \"data\", \"value\": parsed.data}]\n\n except Exception as exc:\n msg = f\"Error parsing curl: {exc}\"\n self.log(msg)\n raise ValueError(msg) from exc\n\n return build_config\n\n def _normalize_url(self, url: str) -> str:\n \"\"\"Normalize URL by adding https:// if no protocol is specified.\"\"\"\n if not url or not isinstance(url, str):\n msg = \"URL cannot be empty\"\n raise ValueError(msg)\n\n url = url.strip()\n if url.startswith((\"http://\", \"https://\")):\n return url\n return f\"https://{url}\"\n\n async def make_request(\n self,\n client: httpx.AsyncClient,\n method: str,\n url: str,\n headers: dict | None = None,\n body: Any = None,\n timeout: int = 5,\n *,\n follow_redirects: bool = True,\n save_to_file: bool = False,\n include_httpx_metadata: bool = False,\n ) -> Data:\n method = method.upper()\n if method not in {\"GET\", \"POST\", \"PATCH\", \"PUT\", \"DELETE\"}:\n msg = f\"Unsupported method: {method}\"\n raise ValueError(msg)\n\n processed_body = self._process_body(body)\n redirection_history = []\n\n try:\n # Prepare request parameters\n request_params = {\n \"method\": method,\n \"url\": url,\n \"headers\": headers,\n \"json\": processed_body,\n \"timeout\": timeout,\n \"follow_redirects\": follow_redirects,\n }\n response = await client.request(**request_params)\n\n redirection_history = [\n {\n \"url\": redirect.headers.get(\"Location\", str(redirect.url)),\n \"status_code\": redirect.status_code,\n }\n for redirect in response.history\n ]\n\n is_binary, file_path = await self._response_info(response, with_file_path=save_to_file)\n response_headers = self._headers_to_dict(response.headers)\n\n # Base metadata\n metadata = {\n \"source\": url,\n \"status_code\": response.status_code,\n \"response_headers\": response_headers,\n }\n\n if redirection_history:\n metadata[\"redirection_history\"] = redirection_history\n\n if save_to_file:\n mode = \"wb\" if is_binary else \"w\"\n encoding = response.encoding if mode == \"w\" else None\n if file_path:\n await aiofiles_os.makedirs(file_path.parent, exist_ok=True)\n if is_binary:\n async with aiofiles.open(file_path, \"wb\") as f:\n await f.write(response.content)\n await f.flush()\n else:\n async with aiofiles.open(file_path, \"w\", encoding=encoding) as f:\n await f.write(response.text)\n await f.flush()\n metadata[\"file_path\"] = str(file_path)\n\n if include_httpx_metadata:\n metadata.update({\"headers\": headers})\n return Data(data=metadata)\n\n # Handle response content\n if is_binary:\n result = response.content\n else:\n try:\n result = response.json()\n except json.JSONDecodeError:\n self.log(\"Failed to decode JSON response\")\n result = response.text.encode(\"utf-8\")\n\n metadata[\"result\"] = result\n\n if include_httpx_metadata:\n metadata.update({\"headers\": headers})\n\n return Data(data=metadata)\n except (httpx.HTTPError, httpx.RequestError, httpx.TimeoutException) as exc:\n self.log(f\"Error making request to {url}\")\n return Data(\n data={\n \"source\": url,\n \"headers\": headers,\n \"status_code\": 500,\n \"error\": str(exc),\n **({\"redirection_history\": redirection_history} if redirection_history else {}),\n },\n )\n\n def add_query_params(self, url: str, params: dict) -> str:\n \"\"\"Add query parameters to URL efficiently.\"\"\"\n if not params:\n return url\n url_parts = list(urlparse(url))\n query = dict(parse_qsl(url_parts[4]))\n query.update(params)\n url_parts[4] = urlencode(query)\n return urlunparse(url_parts)\n\n def _headers_to_dict(self, headers: httpx.Headers) -> dict[str, str]:\n \"\"\"Convert HTTP headers to a dictionary with lowercased keys.\"\"\"\n return {k.lower(): v for k, v in headers.items()}\n\n def _process_headers(self, headers: Any) -> dict:\n \"\"\"Process the headers input into a valid dictionary.\"\"\"\n if headers is None:\n return {}\n if isinstance(headers, dict):\n return headers\n if isinstance(headers, list):\n return {item[\"key\"]: item[\"value\"] for item in headers if self._is_valid_key_value_item(item)}\n return {}\n\n async def make_api_request(self) -> Data:\n \"\"\"Make HTTP request with optimized parameter handling.\"\"\"\n method = self.method\n url = self.url_input.strip() if isinstance(self.url_input, str) else \"\"\n headers = self.headers or {}\n body = self.body or {}\n timeout = self.timeout\n follow_redirects = self.follow_redirects\n save_to_file = self.save_to_file\n include_httpx_metadata = self.include_httpx_metadata\n\n # if self.mode == \"cURL\" and self.curl_input:\n # self._build_config = self.parse_curl(self.curl_input, dotdict())\n # # After parsing curl, get the normalized URL\n # url = self._build_config[\"url_input\"][\"value\"]\n\n # Normalize URL before validation\n url = self._normalize_url(url)\n\n # Validate URL\n if not validators.url(url):\n msg = f\"Invalid URL provided: {url}\"\n raise ValueError(msg)\n\n # Process query parameters\n if isinstance(self.query_params, str):\n query_params = dict(parse_qsl(self.query_params))\n else:\n query_params = self.query_params.data if self.query_params else {}\n\n # Process headers and body\n headers = self._process_headers(headers)\n body = self._process_body(body)\n url = self.add_query_params(url, query_params)\n\n async with httpx.AsyncClient() as client:\n result = await self.make_request(\n client,\n method,\n url,\n headers,\n body,\n timeout,\n follow_redirects=follow_redirects,\n save_to_file=save_to_file,\n include_httpx_metadata=include_httpx_metadata,\n )\n self.status = result\n return result\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n \"\"\"Update the build config based on the selected mode.\"\"\"\n if field_name != \"mode\":\n if field_name == \"curl_input\" and self.mode == \"cURL\" and self.curl_input:\n return self.parse_curl(self.curl_input, build_config)\n return build_config\n\n # print(f\"Current mode: {field_value}\")\n if field_value == \"cURL\":\n set_field_display(build_config, \"curl_input\", value=True)\n if build_config[\"curl_input\"][\"value\"]:\n build_config = self.parse_curl(build_config[\"curl_input\"][\"value\"], build_config)\n else:\n set_field_display(build_config, \"curl_input\", value=False)\n\n return set_current_fields(\n build_config=build_config,\n action_fields=MODE_FIELDS,\n selected_action=field_value,\n default_fields=DEFAULT_FIELDS,\n func=set_field_advanced,\n default_value=True,\n )\n\n async def _response_info(\n self, response: httpx.Response, *, with_file_path: bool = False\n ) -> tuple[bool, Path | None]:\n \"\"\"Determine the file path and whether the response content is binary.\n\n Args:\n response (Response): The HTTP response object.\n with_file_path (bool): Whether to save the response content to a file.\n\n Returns:\n Tuple[bool, Path | None]:\n A tuple containing a boolean indicating if the content is binary and the full file path (if applicable).\n \"\"\"\n content_type = response.headers.get(\"Content-Type\", \"\")\n is_binary = \"application/octet-stream\" in content_type or \"application/binary\" in content_type\n\n if not with_file_path:\n return is_binary, None\n\n component_temp_dir = Path(tempfile.gettempdir()) / self.__class__.__name__\n\n # Create directory asynchronously\n await aiofiles_os.makedirs(component_temp_dir, exist_ok=True)\n\n filename = None\n if \"Content-Disposition\" in response.headers:\n content_disposition = response.headers[\"Content-Disposition\"]\n filename_match = re.search(r'filename=\"(.+?)\"', content_disposition)\n if filename_match:\n extracted_filename = filename_match.group(1)\n filename = extracted_filename\n\n # Step 3: Infer file extension or use part of the request URL if no filename\n if not filename:\n # Extract the last segment of the URL path\n url_path = urlparse(str(response.request.url) if response.request else \"\").path\n base_name = Path(url_path).name # Get the last segment of the path\n if not base_name: # If the path ends with a slash or is empty\n base_name = \"response\"\n\n # Infer file extension\n content_type_to_extension = {\n \"text/plain\": \".txt\",\n \"application/json\": \".json\",\n \"image/jpeg\": \".jpg\",\n \"image/png\": \".png\",\n \"application/octet-stream\": \".bin\",\n }\n extension = content_type_to_extension.get(content_type, \".bin\" if is_binary else \".txt\")\n filename = f\"{base_name}{extension}\"\n\n # Step 4: Define the full file path\n file_path = component_temp_dir / filename\n\n # Step 5: Check if file exists asynchronously and handle accordingly\n try:\n # Try to create the file exclusively (x mode) to check existence\n async with aiofiles.open(file_path, \"x\") as _:\n pass # File created successfully, we can use this path\n except FileExistsError:\n # If file exists, append a timestamp to the filename\n timestamp = datetime.now(timezone.utc).strftime(\"%Y%m%d%H%M%S%f\")\n file_path = component_temp_dir / f\"{timestamp}-{filename}\"\n\n return is_binary, file_path\n" + "value": "import json\nimport re\nimport tempfile\nfrom datetime import datetime, timezone\nfrom pathlib import Path\nfrom typing import Any\nfrom urllib.parse import parse_qsl, urlencode, urlparse, urlunparse\n\nimport aiofiles\nimport aiofiles.os as aiofiles_os\nimport httpx\nimport validators\n\nfrom langflow.base.curl.parse import parse_context\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.inputs.inputs import TabInput\nfrom langflow.io import (\n BoolInput,\n DataInput,\n DropdownInput,\n IntInput,\n MessageTextInput,\n MultilineInput,\n Output,\n TableInput,\n)\nfrom langflow.schema.data import Data\nfrom langflow.schema.dotdict import dotdict\nfrom langflow.services.deps import get_settings_service\nfrom langflow.utils.component_utils import set_current_fields, set_field_advanced, set_field_display\n\n# Define fields for each mode\nMODE_FIELDS = {\n \"URL\": [\n \"url_input\",\n \"method\",\n ],\n \"cURL\": [\"curl_input\"],\n}\n\n# Fields that should always be visible\nDEFAULT_FIELDS = [\"mode\"]\n\n\nclass APIRequestComponent(Component):\n display_name = \"API Request\"\n description = \"Make HTTP requests using URL or cURL commands.\"\n icon = \"Globe\"\n name = \"APIRequest\"\n\n inputs = [\n MessageTextInput(\n name=\"url_input\",\n display_name=\"URL\",\n info=\"Enter the URL for the request.\",\n advanced=False,\n tool_mode=True,\n ),\n MultilineInput(\n name=\"curl_input\",\n display_name=\"cURL\",\n info=(\n \"Paste a curl command to populate the fields. \"\n \"This will fill in the dictionary fields for headers and body.\"\n ),\n real_time_refresh=True,\n tool_mode=True,\n advanced=True,\n show=False,\n ),\n DropdownInput(\n name=\"method\",\n display_name=\"Method\",\n options=[\"GET\", \"POST\", \"PATCH\", \"PUT\", \"DELETE\"],\n value=\"GET\",\n info=\"The HTTP method to use.\",\n real_time_refresh=True,\n ),\n TabInput(\n name=\"mode\",\n display_name=\"Mode\",\n options=[\"URL\", \"cURL\"],\n value=\"URL\",\n info=\"Enable cURL mode to populate fields from a cURL command.\",\n real_time_refresh=True,\n ),\n DataInput(\n name=\"query_params\",\n display_name=\"Query Parameters\",\n info=\"The query parameters to append to the URL.\",\n advanced=True,\n ),\n TableInput(\n name=\"body\",\n display_name=\"Body\",\n info=\"The body to send with the request as a dictionary (for POST, PATCH, PUT).\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Key\",\n \"type\": \"str\",\n \"description\": \"Parameter name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"description\": \"Parameter value\",\n },\n ],\n value=[],\n input_types=[\"Data\"],\n advanced=True,\n real_time_refresh=True,\n ),\n TableInput(\n name=\"headers\",\n display_name=\"Headers\",\n info=\"The headers to send with the request\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Header\",\n \"type\": \"str\",\n \"description\": \"Header name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Header value\",\n },\n ],\n value=[{\"key\": \"User-Agent\", \"value\": get_settings_service().settings.user_agent}],\n advanced=True,\n input_types=[\"Data\"],\n real_time_refresh=True,\n ),\n IntInput(\n name=\"timeout\",\n display_name=\"Timeout\",\n value=30,\n info=\"The timeout to use for the request.\",\n advanced=True,\n ),\n BoolInput(\n name=\"follow_redirects\",\n display_name=\"Follow Redirects\",\n value=True,\n info=\"Whether to follow http redirects.\",\n advanced=True,\n ),\n BoolInput(\n name=\"save_to_file\",\n display_name=\"Save to File\",\n value=False,\n info=\"Save the API response to a temporary file\",\n advanced=True,\n ),\n BoolInput(\n name=\"include_httpx_metadata\",\n display_name=\"Include HTTPx Metadata\",\n value=False,\n info=(\n \"Include properties such as headers, status_code, response_headers, \"\n \"and redirection_history in the output.\"\n ),\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"API Response\", name=\"data\", method=\"make_api_request\"),\n ]\n\n def _parse_json_value(self, value: Any) -> Any:\n \"\"\"Parse a value that might be a JSON string.\"\"\"\n if not isinstance(value, str):\n return value\n\n try:\n parsed = json.loads(value)\n except json.JSONDecodeError:\n return value\n else:\n return parsed\n\n def _process_body(self, body: Any) -> dict:\n \"\"\"Process the body input into a valid dictionary.\"\"\"\n if body is None:\n return {}\n if isinstance(body, dict):\n return self._process_dict_body(body)\n if isinstance(body, str):\n return self._process_string_body(body)\n if isinstance(body, list):\n return self._process_list_body(body)\n return {}\n\n def _process_dict_body(self, body: dict) -> dict:\n \"\"\"Process dictionary body by parsing JSON values.\"\"\"\n return {k: self._parse_json_value(v) for k, v in body.items()}\n\n def _process_string_body(self, body: str) -> dict:\n \"\"\"Process string body by attempting JSON parse.\"\"\"\n try:\n return self._process_body(json.loads(body))\n except json.JSONDecodeError:\n return {\"data\": body}\n\n def _process_list_body(self, body: list) -> dict:\n \"\"\"Process list body by converting to key-value dictionary.\"\"\"\n processed_dict = {}\n try:\n for item in body:\n if not self._is_valid_key_value_item(item):\n continue\n key = item[\"key\"]\n value = self._parse_json_value(item[\"value\"])\n processed_dict[key] = value\n except (KeyError, TypeError, ValueError) as e:\n self.log(f\"Failed to process body list: {e}\")\n return {}\n return processed_dict\n\n def _is_valid_key_value_item(self, item: Any) -> bool:\n \"\"\"Check if an item is a valid key-value dictionary.\"\"\"\n return isinstance(item, dict) and \"key\" in item and \"value\" in item\n\n def parse_curl(self, curl: str, build_config: dotdict) -> dotdict:\n \"\"\"Parse a cURL command and update build configuration.\"\"\"\n try:\n parsed = parse_context(curl)\n\n # Update basic configuration\n url = parsed.url\n # Normalize URL before setting it\n url = self._normalize_url(url)\n\n build_config[\"url_input\"][\"value\"] = url\n build_config[\"method\"][\"value\"] = parsed.method.upper()\n\n # Process headers\n headers_list = [{\"key\": k, \"value\": v} for k, v in parsed.headers.items()]\n build_config[\"headers\"][\"value\"] = headers_list\n\n # Process body data\n if not parsed.data:\n build_config[\"body\"][\"value\"] = []\n elif parsed.data:\n try:\n json_data = json.loads(parsed.data)\n if isinstance(json_data, dict):\n body_list = [\n {\"key\": k, \"value\": json.dumps(v) if isinstance(v, dict | list) else str(v)}\n for k, v in json_data.items()\n ]\n build_config[\"body\"][\"value\"] = body_list\n else:\n build_config[\"body\"][\"value\"] = [{\"key\": \"data\", \"value\": json.dumps(json_data)}]\n except json.JSONDecodeError:\n build_config[\"body\"][\"value\"] = [{\"key\": \"data\", \"value\": parsed.data}]\n\n except Exception as exc:\n msg = f\"Error parsing curl: {exc}\"\n self.log(msg)\n raise ValueError(msg) from exc\n\n return build_config\n\n def _normalize_url(self, url: str) -> str:\n \"\"\"Normalize URL by adding https:// if no protocol is specified.\"\"\"\n if not url or not isinstance(url, str):\n msg = \"URL cannot be empty\"\n raise ValueError(msg)\n\n url = url.strip()\n if url.startswith((\"http://\", \"https://\")):\n return url\n return f\"https://{url}\"\n\n async def make_request(\n self,\n client: httpx.AsyncClient,\n method: str,\n url: str,\n headers: dict | None = None,\n body: Any = None,\n timeout: int = 5,\n *,\n follow_redirects: bool = True,\n save_to_file: bool = False,\n include_httpx_metadata: bool = False,\n ) -> Data:\n method = method.upper()\n if method not in {\"GET\", \"POST\", \"PATCH\", \"PUT\", \"DELETE\"}:\n msg = f\"Unsupported method: {method}\"\n raise ValueError(msg)\n\n processed_body = self._process_body(body)\n redirection_history = []\n\n try:\n # Prepare request parameters\n request_params = {\n \"method\": method,\n \"url\": url,\n \"headers\": headers,\n \"json\": processed_body,\n \"timeout\": timeout,\n \"follow_redirects\": follow_redirects,\n }\n response = await client.request(**request_params)\n\n redirection_history = [\n {\n \"url\": redirect.headers.get(\"Location\", str(redirect.url)),\n \"status_code\": redirect.status_code,\n }\n for redirect in response.history\n ]\n\n is_binary, file_path = await self._response_info(response, with_file_path=save_to_file)\n response_headers = self._headers_to_dict(response.headers)\n\n # Base metadata\n metadata = {\n \"source\": url,\n \"status_code\": response.status_code,\n \"response_headers\": response_headers,\n }\n\n if redirection_history:\n metadata[\"redirection_history\"] = redirection_history\n\n if save_to_file:\n mode = \"wb\" if is_binary else \"w\"\n encoding = response.encoding if mode == \"w\" else None\n if file_path:\n await aiofiles_os.makedirs(file_path.parent, exist_ok=True)\n if is_binary:\n async with aiofiles.open(file_path, \"wb\") as f:\n await f.write(response.content)\n await f.flush()\n else:\n async with aiofiles.open(file_path, \"w\", encoding=encoding) as f:\n await f.write(response.text)\n await f.flush()\n metadata[\"file_path\"] = str(file_path)\n\n if include_httpx_metadata:\n metadata.update({\"headers\": headers})\n return Data(data=metadata)\n\n # Handle response content\n if is_binary:\n result = response.content\n else:\n try:\n result = response.json()\n except json.JSONDecodeError:\n self.log(\"Failed to decode JSON response\")\n result = response.text.encode(\"utf-8\")\n\n metadata[\"result\"] = result\n\n if include_httpx_metadata:\n metadata.update({\"headers\": headers})\n\n return Data(data=metadata)\n except (httpx.HTTPError, httpx.RequestError, httpx.TimeoutException) as exc:\n self.log(f\"Error making request to {url}\")\n return Data(\n data={\n \"source\": url,\n \"headers\": headers,\n \"status_code\": 500,\n \"error\": str(exc),\n **({\"redirection_history\": redirection_history} if redirection_history else {}),\n },\n )\n\n def add_query_params(self, url: str, params: dict) -> str:\n \"\"\"Add query parameters to URL efficiently.\"\"\"\n if not params:\n return url\n url_parts = list(urlparse(url))\n query = dict(parse_qsl(url_parts[4]))\n query.update(params)\n url_parts[4] = urlencode(query)\n return urlunparse(url_parts)\n\n def _headers_to_dict(self, headers: httpx.Headers) -> dict[str, str]:\n \"\"\"Convert HTTP headers to a dictionary with lowercased keys.\"\"\"\n return {k.lower(): v for k, v in headers.items()}\n\n def _process_headers(self, headers: Any) -> dict:\n \"\"\"Process the headers input into a valid dictionary.\"\"\"\n if headers is None:\n return {}\n if isinstance(headers, dict):\n return headers\n if isinstance(headers, list):\n return {item[\"key\"]: item[\"value\"] for item in headers if self._is_valid_key_value_item(item)}\n return {}\n\n async def make_api_request(self) -> Data:\n \"\"\"Make HTTP request with optimized parameter handling.\"\"\"\n method = self.method\n url = self.url_input.strip() if isinstance(self.url_input, str) else \"\"\n headers = self.headers or {}\n body = self.body or {}\n timeout = self.timeout\n follow_redirects = self.follow_redirects\n save_to_file = self.save_to_file\n include_httpx_metadata = self.include_httpx_metadata\n\n # if self.mode == \"cURL\" and self.curl_input:\n # self._build_config = self.parse_curl(self.curl_input, dotdict())\n # # After parsing curl, get the normalized URL\n # url = self._build_config[\"url_input\"][\"value\"]\n\n # Normalize URL before validation\n url = self._normalize_url(url)\n\n # Validate URL\n if not validators.url(url):\n msg = f\"Invalid URL provided: {url}\"\n raise ValueError(msg)\n\n # Process query parameters\n if isinstance(self.query_params, str):\n query_params = dict(parse_qsl(self.query_params))\n else:\n query_params = self.query_params.data if self.query_params else {}\n\n # Process headers and body\n headers = self._process_headers(headers)\n body = self._process_body(body)\n url = self.add_query_params(url, query_params)\n\n async with httpx.AsyncClient() as client:\n result = await self.make_request(\n client,\n method,\n url,\n headers,\n body,\n timeout,\n follow_redirects=follow_redirects,\n save_to_file=save_to_file,\n include_httpx_metadata=include_httpx_metadata,\n )\n self.status = result\n return result\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:\n \"\"\"Update the build config based on the selected mode.\"\"\"\n if field_name != \"mode\":\n if field_name == \"curl_input\" and self.mode == \"cURL\" and self.curl_input:\n return self.parse_curl(self.curl_input, build_config)\n return build_config\n\n # print(f\"Current mode: {field_value}\")\n if field_value == \"cURL\":\n set_field_display(build_config, \"curl_input\", value=True)\n if build_config[\"curl_input\"][\"value\"]:\n build_config = self.parse_curl(build_config[\"curl_input\"][\"value\"], build_config)\n else:\n set_field_display(build_config, \"curl_input\", value=False)\n\n return set_current_fields(\n build_config=build_config,\n action_fields=MODE_FIELDS,\n selected_action=field_value,\n default_fields=DEFAULT_FIELDS,\n func=set_field_advanced,\n default_value=True,\n )\n\n async def _response_info(\n self, response: httpx.Response, *, with_file_path: bool = False\n ) -> tuple[bool, Path | None]:\n \"\"\"Determine the file path and whether the response content is binary.\n\n Args:\n response (Response): The HTTP response object.\n with_file_path (bool): Whether to save the response content to a file.\n\n Returns:\n Tuple[bool, Path | None]:\n A tuple containing a boolean indicating if the content is binary and the full file path (if applicable).\n \"\"\"\n content_type = response.headers.get(\"Content-Type\", \"\")\n is_binary = \"application/octet-stream\" in content_type or \"application/binary\" in content_type\n\n if not with_file_path:\n return is_binary, None\n\n component_temp_dir = Path(tempfile.gettempdir()) / self.__class__.__name__\n\n # Create directory asynchronously\n await aiofiles_os.makedirs(component_temp_dir, exist_ok=True)\n\n filename = None\n if \"Content-Disposition\" in response.headers:\n content_disposition = response.headers[\"Content-Disposition\"]\n filename_match = re.search(r'filename=\"(.+?)\"', content_disposition)\n if filename_match:\n extracted_filename = filename_match.group(1)\n filename = extracted_filename\n\n # Step 3: Infer file extension or use part of the request URL if no filename\n if not filename:\n # Extract the last segment of the URL path\n url_path = urlparse(str(response.request.url) if response.request else \"\").path\n base_name = Path(url_path).name # Get the last segment of the path\n if not base_name: # If the path ends with a slash or is empty\n base_name = \"response\"\n\n # Infer file extension\n content_type_to_extension = {\n \"text/plain\": \".txt\",\n \"application/json\": \".json\",\n \"image/jpeg\": \".jpg\",\n \"image/png\": \".png\",\n \"application/octet-stream\": \".bin\",\n }\n extension = content_type_to_extension.get(content_type, \".bin\" if is_binary else \".txt\")\n filename = f\"{base_name}{extension}\"\n\n # Step 4: Define the full file path\n file_path = component_temp_dir / filename\n\n # Step 5: Check if file exists asynchronously and handle accordingly\n try:\n # Try to create the file exclusively (x mode) to check existence\n async with aiofiles.open(file_path, \"x\") as _:\n pass # File created successfully, we can use this path\n except FileExistsError:\n # If file exists, append a timestamp to the filename\n timestamp = datetime.now(timezone.utc).strftime(\"%Y%m%d%H%M%S%f\")\n file_path = component_temp_dir / f\"{timestamp}-{filename}\"\n\n return is_binary, file_path\n" }, "curl_input": { "_input_type": "MultilineInput", diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index 13a06f8ee986..bf32f1679639 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -876,7 +876,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom import Component\nfrom langflow.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema import Data, DataFrame\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n display_name: str = \"Split Text\"\n description: str = \"Split text into chunks based on specified criteria.\"\n icon = \"scissors-line-dashed\"\n name = \"SplitText\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Data or DataFrame\",\n info=\"The data with texts to split in chunks.\",\n input_types=[\"Data\", \"DataFrame\"],\n required=True,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"Number of characters to overlap between chunks.\",\n value=200,\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=(\n \"The maximum length of each chunk. Text is first split by separator, \"\n \"then chunks are merged up to this size. \"\n \"Individual splits larger than this won't be further divided.\"\n ),\n value=1000,\n ),\n MessageTextInput(\n name=\"separator\",\n display_name=\"Separator\",\n info=(\n \"The character to split on. Use \\\\n for newline. \"\n \"Examples: \\\\n\\\\n for paragraphs, \\\\n for lines, . for sentences\"\n ),\n value=\"\\n\",\n ),\n MessageTextInput(\n name=\"text_key\",\n display_name=\"Text Key\",\n info=\"The key to use for the text column.\",\n value=\"text\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keep_separator\",\n display_name=\"Keep Separator\",\n info=\"Whether to keep the separator in the output chunks and where to place it.\",\n options=[\"False\", \"True\", \"Start\", \"End\"],\n value=\"False\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Chunks\", name=\"dataframe\", method=\"split_text\"),\n ]\n\n def _docs_to_data(self, docs) -> list[Data]:\n return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]\n\n def _fix_separator(self, separator: str) -> str:\n \"\"\"Fix common separator issues and convert to proper format.\"\"\"\n if separator == \"/n\":\n return \"\\n\"\n if separator == \"/t\":\n return \"\\t\"\n return separator\n\n def split_text_base(self):\n separator = self._fix_separator(self.separator)\n separator = unescape_string(separator)\n\n if isinstance(self.data_inputs, DataFrame):\n if not len(self.data_inputs):\n msg = \"DataFrame is empty\"\n raise TypeError(msg)\n\n self.data_inputs.text_key = self.text_key\n try:\n documents = self.data_inputs.to_lc_documents()\n except Exception as e:\n msg = f\"Error converting DataFrame to documents: {e}\"\n raise TypeError(msg) from e\n else:\n if not self.data_inputs:\n msg = \"No data inputs provided\"\n raise TypeError(msg)\n\n documents = []\n if isinstance(self.data_inputs, Data):\n self.data_inputs.text_key = self.text_key\n documents = [self.data_inputs.to_lc_document()]\n else:\n try:\n documents = [input_.to_lc_document() for input_ in self.data_inputs if isinstance(input_, Data)]\n if not documents:\n msg = f\"No valid Data inputs found in {type(self.data_inputs)}\"\n raise TypeError(msg)\n except AttributeError as e:\n msg = f\"Invalid input type in collection: {e}\"\n raise TypeError(msg) from e\n try:\n # Convert string 'False'/'True' to boolean\n keep_sep = self.keep_separator\n if isinstance(keep_sep, str):\n if keep_sep.lower() == \"false\":\n keep_sep = False\n elif keep_sep.lower() == \"true\":\n keep_sep = True\n # 'start' and 'end' are kept as strings\n\n splitter = CharacterTextSplitter(\n chunk_overlap=self.chunk_overlap,\n chunk_size=self.chunk_size,\n separator=separator,\n keep_separator=keep_sep,\n )\n return splitter.split_documents(documents)\n except Exception as e:\n msg = f\"Error splitting text: {e}\"\n raise TypeError(msg) from e\n\n def split_text(self) -> DataFrame:\n return DataFrame(self._docs_to_data(self.split_text_base()))\n" + "value": "from langchain_text_splitters import CharacterTextSplitter\n\nfrom langflow.custom.custom_component.component import Component\nfrom langflow.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output\nfrom langflow.schema.data import Data\nfrom langflow.schema.dataframe import DataFrame\nfrom langflow.utils.util import unescape_string\n\n\nclass SplitTextComponent(Component):\n display_name: str = \"Split Text\"\n description: str = \"Split text into chunks based on specified criteria.\"\n icon = \"scissors-line-dashed\"\n name = \"SplitText\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Data or DataFrame\",\n info=\"The data with texts to split in chunks.\",\n input_types=[\"Data\", \"DataFrame\"],\n required=True,\n ),\n IntInput(\n name=\"chunk_overlap\",\n display_name=\"Chunk Overlap\",\n info=\"Number of characters to overlap between chunks.\",\n value=200,\n ),\n IntInput(\n name=\"chunk_size\",\n display_name=\"Chunk Size\",\n info=(\n \"The maximum length of each chunk. Text is first split by separator, \"\n \"then chunks are merged up to this size. \"\n \"Individual splits larger than this won't be further divided.\"\n ),\n value=1000,\n ),\n MessageTextInput(\n name=\"separator\",\n display_name=\"Separator\",\n info=(\n \"The character to split on. Use \\\\n for newline. \"\n \"Examples: \\\\n\\\\n for paragraphs, \\\\n for lines, . for sentences\"\n ),\n value=\"\\n\",\n ),\n MessageTextInput(\n name=\"text_key\",\n display_name=\"Text Key\",\n info=\"The key to use for the text column.\",\n value=\"text\",\n advanced=True,\n ),\n DropdownInput(\n name=\"keep_separator\",\n display_name=\"Keep Separator\",\n info=\"Whether to keep the separator in the output chunks and where to place it.\",\n options=[\"False\", \"True\", \"Start\", \"End\"],\n value=\"False\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Chunks\", name=\"dataframe\", method=\"split_text\"),\n ]\n\n def _docs_to_data(self, docs) -> list[Data]:\n return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]\n\n def _fix_separator(self, separator: str) -> str:\n \"\"\"Fix common separator issues and convert to proper format.\"\"\"\n if separator == \"/n\":\n return \"\\n\"\n if separator == \"/t\":\n return \"\\t\"\n return separator\n\n def split_text_base(self):\n separator = self._fix_separator(self.separator)\n separator = unescape_string(separator)\n\n if isinstance(self.data_inputs, DataFrame):\n if not len(self.data_inputs):\n msg = \"DataFrame is empty\"\n raise TypeError(msg)\n\n self.data_inputs.text_key = self.text_key\n try:\n documents = self.data_inputs.to_lc_documents()\n except Exception as e:\n msg = f\"Error converting DataFrame to documents: {e}\"\n raise TypeError(msg) from e\n else:\n if not self.data_inputs:\n msg = \"No data inputs provided\"\n raise TypeError(msg)\n\n documents = []\n if isinstance(self.data_inputs, Data):\n self.data_inputs.text_key = self.text_key\n documents = [self.data_inputs.to_lc_document()]\n else:\n try:\n documents = [input_.to_lc_document() for input_ in self.data_inputs if isinstance(input_, Data)]\n if not documents:\n msg = f\"No valid Data inputs found in {type(self.data_inputs)}\"\n raise TypeError(msg)\n except AttributeError as e:\n msg = f\"Invalid input type in collection: {e}\"\n raise TypeError(msg) from e\n try:\n # Convert string 'False'/'True' to boolean\n keep_sep = self.keep_separator\n if isinstance(keep_sep, str):\n if keep_sep.lower() == \"false\":\n keep_sep = False\n elif keep_sep.lower() == \"true\":\n keep_sep = True\n # 'start' and 'end' are kept as strings\n\n splitter = CharacterTextSplitter(\n chunk_overlap=self.chunk_overlap,\n chunk_size=self.chunk_size,\n separator=separator,\n keep_separator=keep_sep,\n )\n return splitter.split_documents(documents)\n except Exception as e:\n msg = f\"Error splitting text: {e}\"\n raise TypeError(msg) from e\n\n def split_text(self) -> DataFrame:\n return DataFrame(self._docs_to_data(self.split_text_base()))\n" }, "data_inputs": { "advanced": false, From 1c658f58b2ea82a2028e87115f7fdff1d276b083 Mon Sep 17 00:00:00 2001 From: cristhianzl Date: Tue, 10 Jun 2025 16:04:15 -0300 Subject: [PATCH 7/8] fix tests --- .../starter_projects/Vector Store RAG.json | 332 +++++++++--------- .../tests/core/features/freeze.spec.ts | 12 +- 2 files changed, 171 insertions(+), 173 deletions(-) diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json index bf32f1679639..34a68c06ad72 100644 --- a/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json +++ b/src/backend/base/langflow/initial_setup/starter_projects/Vector Store RAG.json @@ -7,7 +7,7 @@ "data": { "sourceHandle": { "dataType": "ChatInput", - "id": "ChatInput-FzOTA", + "id": "ChatInput-kNQkx", "name": "message", "output_types": [ "Message" @@ -15,7 +15,7 @@ }, "targetHandle": { "fieldName": "question", - "id": "Prompt-kr3Rx", + "id": "Prompt-zHQI0", "inputTypes": [ "Message", "Text" @@ -23,39 +23,12 @@ "type": "str" } }, - "id": "reactflow__edge-ChatInput-FzOTA{œdataTypeœ:œChatInputœ,œidœ:œChatInput-FzOTAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-kr3Rx{œfieldNameœ:œquestionœ,œidœ:œPrompt-kr3Rxœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-ChatInput-kNQkx{œdataTypeœ:œChatInputœ,œidœ:œChatInput-kNQkxœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-Prompt-zHQI0{œfieldNameœ:œquestionœ,œidœ:œPrompt-zHQI0œ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", "selected": false, - "source": "ChatInput-FzOTA", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-FzOTAœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-kr3Rx", - "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-kr3Rxœ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" - }, - { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "File", - "id": "File-EO8pn", - "name": "data", - "output_types": [] - }, - "targetHandle": { - "fieldName": "data_inputs", - "id": "SplitText-aHhAi", - "inputTypes": [ - "Data", - "DataFrame" - ], - "type": "other" - } - }, - "id": "reactflow__edge-File-CBftc{œdataTypeœ:œFileœ,œidœ:œFile-CBftcœ,œnameœ:œdataœ,œoutput_typesœ:[œDataœ]}-SplitText-gIoap{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-gIoapœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", - "selected": false, - "source": "File-EO8pn", - "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-EO8pnœ, œnameœ: œdataœ, œoutput_typesœ: []}", - "target": "SplitText-aHhAi", - "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-aHhAiœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" + "source": "ChatInput-kNQkx", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-kNQkxœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-zHQI0", + "targetHandle": "{œfieldNameœ: œquestionœ, œidœ: œPrompt-zHQI0œ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -63,7 +36,7 @@ "data": { "sourceHandle": { "dataType": "Prompt", - "id": "Prompt-kr3Rx", + "id": "Prompt-zHQI0", "name": "prompt", "output_types": [ "Message" @@ -71,19 +44,19 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "OpenAIModel-7W8gE", + "id": "OpenAIModel-9bWp2", "inputTypes": [ "Message" ], "type": "str" } }, - "id": "reactflow__edge-Prompt-kr3Rx{œdataTypeœ:œPromptœ,œidœ:œPrompt-kr3Rxœ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-OpenAIModel-Ej17f{œfieldNameœ:œinput_valueœ,œidœ:œOpenAIModel-Ej17fœ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-Prompt-zHQI0{œdataTypeœ:œPromptœ,œidœ:œPrompt-zHQI0œ,œnameœ:œpromptœ,œoutput_typesœ:[œMessageœ]}-OpenAIModel-9bWp2{œfieldNameœ:œinput_valueœ,œidœ:œOpenAIModel-9bWp2œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "Prompt-kr3Rx", - "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-kr3Rxœ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", - "target": "OpenAIModel-Ej17f", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œOpenAIModel-7W8gEœ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" + "source": "Prompt-zHQI0", + "sourceHandle": "{œdataTypeœ: œPromptœ, œidœ: œPrompt-zHQI0œ, œnameœ: œpromptœ, œoutput_typesœ: [œMessageœ]}", + "target": "OpenAIModel-9bWp2", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œOpenAIModel-9bWp2œ, œinputTypesœ: [œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -91,7 +64,7 @@ "data": { "sourceHandle": { "dataType": "OpenAIModel", - "id": "OpenAIModel-Ej17f", + "id": "OpenAIModel-9bWp2", "name": "text_output", "output_types": [ "Message" @@ -99,7 +72,7 @@ }, "targetHandle": { "fieldName": "input_value", - "id": "ChatOutput-mbLiD", + "id": "ChatOutput-GAFHg", "inputTypes": [ "Data", "DataFrame", @@ -108,12 +81,12 @@ "type": "str" } }, - "id": "reactflow__edge-OpenAIModel-Ej17f{œdataTypeœ:œOpenAIModelœ,œidœ:œOpenAIModel-Ej17fœ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-nGc6Z{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-nGc6Zœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-OpenAIModel-9bWp2{œdataTypeœ:œOpenAIModelœ,œidœ:œOpenAIModel-9bWp2œ,œnameœ:œtext_outputœ,œoutput_typesœ:[œMessageœ]}-ChatOutput-GAFHg{œfieldNameœ:œinput_valueœ,œidœ:œChatOutput-GAFHgœ,œinputTypesœ:[œDataœ,œDataFrameœ,œMessageœ],œtypeœ:œstrœ}", "selected": false, - "source": "OpenAIModel-Ej17f", - "sourceHandle": "{œdataTypeœ: œOpenAIModelœ, œidœ: œOpenAIModel-Ej17fœ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", - "target": "ChatOutput-nGc6Z", - "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-mbLiDœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œstrœ}" + "source": "OpenAIModel-9bWp2", + "sourceHandle": "{œdataTypeœ: œOpenAIModelœ, œidœ: œOpenAIModel-9bWp2œ, œnameœ: œtext_outputœ, œoutput_typesœ: [œMessageœ]}", + "target": "ChatOutput-GAFHg", + "targetHandle": "{œfieldNameœ: œinput_valueœ, œidœ: œChatOutput-GAFHgœ, œinputTypesœ: [œDataœ, œDataFrameœ, œMessageœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -121,7 +94,7 @@ "data": { "sourceHandle": { "dataType": "parser", - "id": "parser-YIJGN", + "id": "parser-Qet8H", "name": "parsed_text", "output_types": [ "Message" @@ -129,7 +102,7 @@ }, "targetHandle": { "fieldName": "context", - "id": "Prompt-dcKE8", + "id": "Prompt-zHQI0", "inputTypes": [ "Message", "Text" @@ -137,39 +110,12 @@ "type": "str" } }, - "id": "reactflow__edge-parser-YIJGN{œdataTypeœ:œparserœ,œidœ:œparser-YIJGNœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-kr3Rx{œfieldNameœ:œcontextœ,œidœ:œPrompt-kr3Rxœ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", + "id": "reactflow__edge-parser-Qet8H{œdataTypeœ:œparserœ,œidœ:œparser-Qet8Hœ,œnameœ:œparsed_textœ,œoutput_typesœ:[œMessageœ]}-Prompt-zHQI0{œfieldNameœ:œcontextœ,œidœ:œPrompt-zHQI0œ,œinputTypesœ:[œMessageœ,œTextœ],œtypeœ:œstrœ}", "selected": false, - "source": "parser-YIJGN", - "sourceHandle": "{œdataTypeœ: œparserœ, œidœ: œparser-YIJGNœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", - "target": "Prompt-kr3Rx", - "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-dcKE8œ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" - }, - { - "animated": false, - "className": "", - "data": { - "sourceHandle": { - "dataType": "SplitText", - "id": "SplitText-aHhAi", - "name": "chunks", - "output_types": [] - }, - "targetHandle": { - "fieldName": "ingest_data", - "id": "AstraDB-xD6ep", - "inputTypes": [ - "Data", - "DataFrame" - ], - "type": "other" - } - }, - "id": "reactflow__edge-SplitText-aHhAi{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-aHhAiœ,œnameœ:œchunksœ,œoutput_typesœ:[œDataœ]}-AstraDB-lXzoG{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-lXzoGœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", - "selected": false, - "source": "SplitText-aHhAi", - "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-aHhAiœ, œnameœ: œchunksœ, œoutput_typesœ: []}", - "target": "AstraDB-lXzoG", - "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-xD6epœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" + "source": "parser-Qet8H", + "sourceHandle": "{œdataTypeœ: œparserœ, œidœ: œparser-Qet8Hœ, œnameœ: œparsed_textœ, œoutput_typesœ: [œMessageœ]}", + "target": "Prompt-zHQI0", + "targetHandle": "{œfieldNameœ: œcontextœ, œidœ: œPrompt-zHQI0œ, œinputTypesœ: [œMessageœ, œTextœ], œtypeœ: œstrœ}" }, { "animated": false, @@ -177,7 +123,7 @@ "data": { "sourceHandle": { "dataType": "OpenAIEmbeddings", - "id": "OpenAIEmbeddings-tSZ8A", + "id": "OpenAIEmbeddings-D1jSt", "name": "embeddings", "output_types": [ "Embeddings" @@ -185,19 +131,19 @@ }, "targetHandle": { "fieldName": "embedding_model", - "id": "AstraDB-xD6ep", + "id": "AstraDB-eQaxM", "inputTypes": [ "Embeddings" ], "type": "other" } }, - "id": "reactflow__edge-OpenAIEmbeddings-tSZ8A{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-tSZ8Aœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-lXzoG{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-lXzoGœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-OpenAIEmbeddings-D1jSt{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-D1jStœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-eQaxM{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-eQaxMœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", "selected": false, - "source": "OpenAIEmbeddings-tSZ8A", - "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-tSZ8Aœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", - "target": "AstraDB-lXzoG", - "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-xD6epœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" + "source": "OpenAIEmbeddings-D1jSt", + "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-D1jStœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", + "target": "AstraDB-eQaxM", + "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-eQaxMœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" }, { "animated": false, @@ -205,7 +151,7 @@ "data": { "sourceHandle": { "dataType": "OpenAIEmbeddings", - "id": "OpenAIEmbeddings-M2xTe", + "id": "OpenAIEmbeddings-4Uky4", "name": "embeddings", "output_types": [ "Embeddings" @@ -213,19 +159,19 @@ }, "targetHandle": { "fieldName": "embedding_model", - "id": "AstraDB-PTTd1", + "id": "AstraDB-tVkFw", "inputTypes": [ "Embeddings" ], "type": "other" } }, - "id": "reactflow__edge-OpenAIEmbeddings-M2xTe{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-M2xTeœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-BRnBB{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-BRnBBœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-OpenAIEmbeddings-4Uky4{œdataTypeœ:œOpenAIEmbeddingsœ,œidœ:œOpenAIEmbeddings-4Uky4œ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}-AstraDB-tVkFw{œfieldNameœ:œembedding_modelœ,œidœ:œAstraDB-tVkFwœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}", "selected": false, - "source": "OpenAIEmbeddings-M2xTe", - "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-M2xTeœ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", - "target": "AstraDB-BRnBB", - "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-PTTd1œ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" + "source": "OpenAIEmbeddings-4Uky4", + "sourceHandle": "{œdataTypeœ: œOpenAIEmbeddingsœ, œidœ: œOpenAIEmbeddings-4Uky4œ, œnameœ: œembeddingsœ, œoutput_typesœ: [œEmbeddingsœ]}", + "target": "AstraDB-tVkFw", + "targetHandle": "{œfieldNameœ: œembedding_modelœ, œidœ: œAstraDB-tVkFwœ, œinputTypesœ: [œEmbeddingsœ], œtypeœ: œotherœ}" }, { "animated": false, @@ -233,7 +179,7 @@ "data": { "sourceHandle": { "dataType": "ChatInput", - "id": "ChatInput-FzOTA", + "id": "ChatInput-kNQkx", "name": "message", "output_types": [ "Message" @@ -241,19 +187,19 @@ }, "targetHandle": { "fieldName": "search_query", - "id": "AstraDB-PTTd1", + "id": "AstraDB-tVkFw", "inputTypes": [ "Message" ], "type": "query" } }, - "id": "reactflow__edge-ChatInput-FzOTA{œdataTypeœ:œChatInputœ,œidœ:œChatInput-FzOTAœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-BRnBB{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-BRnBBœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", + "id": "reactflow__edge-ChatInput-kNQkx{œdataTypeœ:œChatInputœ,œidœ:œChatInput-kNQkxœ,œnameœ:œmessageœ,œoutput_typesœ:[œMessageœ]}-AstraDB-tVkFw{œfieldNameœ:œsearch_queryœ,œidœ:œAstraDB-tVkFwœ,œinputTypesœ:[œMessageœ],œtypeœ:œqueryœ}", "selected": false, - "source": "ChatInput-FzOTA", - "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-FzOTAœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", - "target": "AstraDB-BRnBB", - "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-PTTd1œ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}" + "source": "ChatInput-kNQkx", + "sourceHandle": "{œdataTypeœ: œChatInputœ, œidœ: œChatInput-kNQkxœ, œnameœ: œmessageœ, œoutput_typesœ: [œMessageœ]}", + "target": "AstraDB-tVkFw", + "targetHandle": "{œfieldNameœ: œsearch_queryœ, œidœ: œAstraDB-tVkFwœ, œinputTypesœ: [œMessageœ], œtypeœ: œqueryœ}" }, { "animated": false, @@ -261,7 +207,7 @@ "data": { "sourceHandle": { "dataType": "AstraDB", - "id": "AstraDB-BRnBB", + "id": "AstraDB-tVkFw", "name": "dataframe", "output_types": [ "DataFrame" @@ -269,7 +215,7 @@ }, "targetHandle": { "fieldName": "input_data", - "id": "parser-l9sAS", + "id": "parser-Qet8H", "inputTypes": [ "DataFrame", "Data" @@ -277,12 +223,12 @@ "type": "other" } }, - "id": "reactflow__edge-AstraDB-BRnBB{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-BRnBBœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-parser-YIJGN{œfieldNameœ:œinput_dataœ,œidœ:œparser-YIJGNœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-AstraDB-tVkFw{œdataTypeœ:œAstraDBœ,œidœ:œAstraDB-tVkFwœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-parser-Qet8H{œfieldNameœ:œinput_dataœ,œidœ:œparser-Qet8Hœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", "selected": false, - "source": "AstraDB-BRnBB", - "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-BRnBBœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", - "target": "parser-YIJGN", - "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œparser-l9sASœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" + "source": "AstraDB-tVkFw", + "sourceHandle": "{œdataTypeœ: œAstraDBœ, œidœ: œAstraDB-tVkFwœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", + "target": "parser-Qet8H", + "targetHandle": "{œfieldNameœ: œinput_dataœ, œidœ: œparser-Qet8Hœ, œinputTypesœ: [œDataFrameœ, œDataœ], œtypeœ: œotherœ}" }, { "animated": false, @@ -290,7 +236,7 @@ "data": { "sourceHandle": { "dataType": "File", - "id": "File-EO8pn", + "id": "File-kPRpn", "name": "dataframe", "output_types": [ "DataFrame" @@ -298,7 +244,7 @@ }, "targetHandle": { "fieldName": "data_inputs", - "id": "SplitText-aHhAi", + "id": "SplitText-sDxql", "inputTypes": [ "Data", "DataFrame" @@ -306,12 +252,38 @@ "type": "other" } }, - "id": "xy-edge__File-EO8pn{œdataTypeœ:œFileœ,œidœ:œFile-EO8pnœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-SplitText-aHhAi{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-aHhAiœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", + "id": "reactflow__edge-File-kPRpn{œdataTypeœ:œFileœ,œidœ:œFile-kPRpnœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-SplitText-sDxql{œfieldNameœ:œdata_inputsœ,œidœ:œSplitText-sDxqlœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", "selected": false, - "source": "File-EO8pn", - "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-EO8pnœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", - "target": "SplitText-aHhAi", - "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-aHhAiœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" + "source": "File-kPRpn", + "sourceHandle": "{œdataTypeœ: œFileœ, œidœ: œFile-kPRpnœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", + "target": "SplitText-sDxql", + "targetHandle": "{œfieldNameœ: œdata_inputsœ, œidœ: œSplitText-sDxqlœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" + }, + { + "data": { + "sourceHandle": { + "dataType": "SplitText", + "id": "SplitText-sDxql", + "name": "dataframe", + "output_types": [ + "DataFrame" + ] + }, + "targetHandle": { + "fieldName": "ingest_data", + "id": "AstraDB-eQaxM", + "inputTypes": [ + "Data", + "DataFrame" + ], + "type": "other" + } + }, + "id": "xy-edge__SplitText-sDxql{œdataTypeœ:œSplitTextœ,œidœ:œSplitText-sDxqlœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-AstraDB-eQaxM{œfieldNameœ:œingest_dataœ,œidœ:œAstraDB-eQaxMœ,œinputTypesœ:[œDataœ,œDataFrameœ],œtypeœ:œotherœ}", + "source": "SplitText-sDxql", + "sourceHandle": "{œdataTypeœ: œSplitTextœ, œidœ: œSplitText-sDxqlœ, œnameœ: œdataframeœ, œoutput_typesœ: [œDataFrameœ]}", + "target": "AstraDB-eQaxM", + "targetHandle": "{œfieldNameœ: œingest_dataœ, œidœ: œAstraDB-eQaxMœ, œinputTypesœ: [œDataœ, œDataFrameœ], œtypeœ: œotherœ}" } ], "nodes": [ @@ -319,7 +291,7 @@ "data": { "description": "Get chat inputs from the Playground.", "display_name": "Chat Input", - "id": "ChatInput-FzOTA", + "id": "ChatInput-kNQkx", "node": { "base_classes": [ "Message" @@ -585,7 +557,7 @@ }, "dragging": false, "height": 234, - "id": "ChatInput-FzOTA", + "id": "ChatInput-kNQkx", "measured": { "height": 234, "width": 320 @@ -606,7 +578,7 @@ "data": { "description": "Create a prompt template with dynamic variables.", "display_name": "Prompt", - "id": "Prompt-kr3Rx", + "id": "Prompt-zHQI0", "node": { "base_classes": [ "Message" @@ -766,7 +738,7 @@ }, "dragging": false, "height": 433, - "id": "Prompt-kr3Rx", + "id": "Prompt-zHQI0", "measured": { "height": 433, "width": 320 @@ -787,7 +759,7 @@ "data": { "description": "Split text into chunks based on specified criteria.", "display_name": "Split Text", - "id": "SplitText-aHhAi", + "id": "SplitText-sDxql", "node": { "base_classes": [ "Data" @@ -971,7 +943,7 @@ }, "dragging": false, "height": 475, - "id": "SplitText-aHhAi", + "id": "SplitText-sDxql", "measured": { "height": 475, "width": 320 @@ -990,7 +962,7 @@ }, { "data": { - "id": "note-eJrcq", + "id": "note-VWeXf", "node": { "description": "## 🐕 2. Retriever Flow\n\nThis flow answers your questions with contextual data retrieved from your vector database.\n\nOpen the **Playground** and ask, \n\n```\nWhat is this document about?\n```\n", "display_name": "", @@ -1003,7 +975,7 @@ }, "dragging": false, "height": 324, - "id": "note-eJrcq", + "id": "note-VWeXf", "measured": { "height": 324, "width": 325 @@ -1027,7 +999,7 @@ }, { "data": { - "id": "note-oUrKA", + "id": "note-K46GL", "node": { "description": "## 📖 README\n\nLoad your data into a vector database with the 📚 **Load Data** flow, and then use your data as chat context with the 🐕 **Retriever** flow.\n\n**🚨 Add your OpenAI API key as a global variable to easily add it to all of the OpenAI components in this flow.** \n\n**Quick start**\n1. Run the 📚 **Load Data** flow.\n2. Run the 🐕 **Retriever** flow.\n\n**Next steps** \n\n- Experiment by changing the prompt and the loaded data to see how the bot's responses change. \n\nFor more info, see the [Langflow docs](https://docs.langflow.org/starter-projects-vector-store-rag).", "display_name": "Read Me", @@ -1040,7 +1012,7 @@ }, "dragging": false, "height": 324, - "id": "note-oUrKA", + "id": "note-K46GL", "measured": { "height": 324, "width": 325 @@ -1066,7 +1038,7 @@ "data": { "description": "Display a chat message in the Playground.", "display_name": "Chat Output", - "id": "ChatOutput-nGc6Z", + "id": "ChatOutput-GAFHg", "node": { "base_classes": [ "Message" @@ -1349,7 +1321,7 @@ }, "dragging": false, "height": 234, - "id": "ChatOutput-nGc6Z", + "id": "ChatOutput-GAFHg", "measured": { "height": 234, "width": 320 @@ -1368,7 +1340,7 @@ }, { "data": { - "id": "OpenAIEmbeddings-M2xTe", + "id": "OpenAIEmbeddings-4Uky4", "node": { "base_classes": [ "Embeddings" @@ -1664,7 +1636,7 @@ "show": true, "title_case": false, "type": "str", - "value": "OPENAI_API_KEY" + "value": "" }, "openai_api_type": { "_input_type": "MessageTextInput", @@ -1847,7 +1819,7 @@ }, "dragging": false, "height": 320, - "id": "OpenAIEmbeddings-M2xTe", + "id": "OpenAIEmbeddings-4Uky4", "measured": { "height": 320, "width": 320 @@ -1860,13 +1832,13 @@ "x": 825.435626932521, "y": 739.6327999745448 }, - "selected": true, + "selected": false, "type": "genericNode", "width": 320 }, { "data": { - "id": "note-cYKfJ", + "id": "note-Jk7TI", "node": { "description": "## 📚 1. Load Data Flow\n\nRun this first! Load data from a local file and embed it into the vector database.\n\nSelect a Database and a Collection, or create new ones. \n\nClick ▶️ **Run component** on the **Astra DB** component to load your data.\n\n* If you're using OSS Langflow, add your Astra DB Application Token to the Astra DB component.\n\n#### Next steps:\n Experiment by changing the prompt and the contextual data to see how the retrieval flow's responses change.", "display_name": "", @@ -1879,7 +1851,7 @@ }, "dragging": false, "height": 324, - "id": "note-cYKfJ", + "id": "note-Jk7TI", "measured": { "height": 324, "width": 325 @@ -1903,7 +1875,7 @@ }, { "data": { - "id": "OpenAIEmbeddings-tSZ8A", + "id": "OpenAIEmbeddings-D1jSt", "node": { "base_classes": [ "Embeddings" @@ -2199,7 +2171,7 @@ "show": true, "title_case": false, "type": "str", - "value": "OPENAI_API_KEY" + "value": "" }, "openai_api_type": { "_input_type": "MessageTextInput", @@ -2382,7 +2354,7 @@ }, "dragging": false, "height": 320, - "id": "OpenAIEmbeddings-tSZ8A", + "id": "OpenAIEmbeddings-D1jSt", "measured": { "height": 320, "width": 320 @@ -2401,7 +2373,7 @@ }, { "data": { - "id": "File-EO8pn", + "id": "File-kPRpn", "node": { "base_classes": [ "Data" @@ -2648,7 +2620,7 @@ }, "dragging": false, "height": 367, - "id": "File-EO8pn", + "id": "File-kPRpn", "measured": { "height": 367, "width": 320 @@ -2667,7 +2639,7 @@ }, { "data": { - "id": "note-NsKYL", + "id": "note-mQAwf", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2680,7 +2652,7 @@ }, "dragging": false, "height": 324, - "id": "note-NsKYL", + "id": "note-mQAwf", "measured": { "height": 324, "width": 324 @@ -2699,7 +2671,7 @@ }, { "data": { - "id": "note-By1Lm", + "id": "note-6Bw7F", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2712,7 +2684,7 @@ }, "dragging": false, "height": 324, - "id": "note-By1Lm", + "id": "note-6Bw7F", "measured": { "height": 324, "width": 324 @@ -2731,7 +2703,7 @@ }, { "data": { - "id": "note-iSzAZ", + "id": "note-dVn2E", "node": { "description": "### 💡 Add your OpenAI API key here 👇", "display_name": "", @@ -2744,7 +2716,7 @@ }, "dragging": false, "height": 324, - "id": "note-iSzAZ", + "id": "note-dVn2E", "measured": { "height": 324, "width": 324 @@ -2763,7 +2735,7 @@ }, { "data": { - "id": "OpenAIModel-Ej17f", + "id": "OpenAIModel-9bWp2", "node": { "base_classes": [ "LanguageModel", @@ -3142,9 +3114,9 @@ "type": "OpenAIModel" }, "dragging": false, - "id": "OpenAIModel-Ej17f", + "id": "OpenAIModel-9bWp2", "measured": { - "height": 614, + "height": 540, "width": 320 }, "position": { @@ -3156,7 +3128,7 @@ }, { "data": { - "id": "parser-YIJGN", + "id": "parser-Qet8H", "node": { "base_classes": [ "Message" @@ -3317,9 +3289,9 @@ "type": "parser" }, "dragging": false, - "id": "parser-YIJGN", + "id": "parser-Qet8H", "measured": { - "height": 395, + "height": 361, "width": 320 }, "position": { @@ -3331,7 +3303,7 @@ }, { "data": { - "id": "AstraDB-BRnBB", + "id": "AstraDB-tVkFw", "node": { "base_classes": [ "Data", @@ -3465,7 +3437,7 @@ "tool_mode": false, "trace_as_metadata": true, "type": "str", - "value": "" + "value": "ASTRA_DB_API_ENDPOINT" }, "astradb_vectorstore_kwargs": { "_input_type": "NestedDictInput", @@ -3706,7 +3678,11 @@ "dynamic": false, "info": "Cloud provider for the new database.", "name": "cloud_provider", - "options": [], + "options": [ + "Amazon Web Services", + "Google Cloud Platform", + "Microsoft Azure" + ], "options_metadata": [], "placeholder": "", "real_time_refresh": true, @@ -3749,12 +3725,22 @@ "info": "The Database name for the Astra DB instance.", "name": "database_name", "options": [], - "options_metadata": [], + "options_metadata": [ + { + "api_endpoint": "https://deb10a81-3c5d-4fd3-8b1b-945915d2835b-us-east-2.apps.astra.datastax.com", + "collections": 1, + "keyspaces": [ + "default_keyspace" + ], + "org_id": "4bd8a5f9-41b3-4d8a-b039-0dd35f5eb374", + "status": null + } + ], "placeholder": "", "real_time_refresh": true, "refresh_button": true, "required": true, - "show": false, + "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, @@ -4079,9 +4065,9 @@ "type": "AstraDB" }, "dragging": false, - "id": "AstraDB-BRnBB", + "id": "AstraDB-tVkFw", "measured": { - "height": 449, + "height": 458, "width": 320 }, "position": { @@ -4093,7 +4079,7 @@ }, { "data": { - "id": "AstraDB-lXzoG", + "id": "AstraDB-eQaxM", "node": { "base_classes": [ "Data", @@ -4515,12 +4501,22 @@ "info": "The Database name for the Astra DB instance.", "name": "database_name", "options": [], - "options_metadata": [], + "options_metadata": [ + { + "api_endpoint": "https://deb10a81-3c5d-4fd3-8b1b-945915d2835b-us-east-2.apps.astra.datastax.com", + "collections": 1, + "keyspaces": [ + "default_keyspace" + ], + "org_id": "4bd8a5f9-41b3-4d8a-b039-0dd35f5eb374", + "status": null + } + ], "placeholder": "", "real_time_refresh": true, "refresh_button": true, "required": true, - "show": false, + "show": true, "title_case": false, "tool_mode": false, "trace_as_metadata": true, @@ -4836,7 +4832,7 @@ "show": true, "title_case": false, "type": "str", - "value": "ASTRA_DB_APPLICATION_TOKEN" + "value": "" } }, "tool_mode": false @@ -4845,9 +4841,9 @@ "type": "AstraDB" }, "dragging": false, - "id": "AstraDB-lXzoG", + "id": "AstraDB-eQaxM", "measured": { - "height": 449, + "height": 458, "width": 320 }, "position": { @@ -4859,9 +4855,9 @@ } ], "viewport": { - "x": 90.57560089396452, - "y": -149.7037806007536, - "zoom": 0.46276403161264995 + "x": 20.50191698112849, + "y": -144.65436276592914, + "zoom": 0.43295751491830675 } }, "description": "Load your data for chat context with Retrieval Augmented Generation.", diff --git a/src/frontend/tests/core/features/freeze.spec.ts b/src/frontend/tests/core/features/freeze.spec.ts index e35d40ef8d02..a7409db33afa 100644 --- a/src/frontend/tests/core/features/freeze.spec.ts +++ b/src/frontend/tests/core/features/freeze.spec.ts @@ -58,13 +58,13 @@ test( //fourth component await page.getByTestId("sidebar-search-input").click(); - await page.getByTestId("sidebar-search-input").fill("data to message"); - await page.waitForSelector('[data-testid="processingData to Message"]', { + await page.getByTestId("sidebar-search-input").fill("Parser"); + await page.waitForSelector('[data-testid="processingParser"]', { timeout: 1000, }); await page - .getByTestId("processingData to Message") + .getByTestId("processingParser") .dragTo(page.locator('//*[@id="react-flow-id"]'), { targetPosition: { x: 50, y: 300 }, }); @@ -133,11 +133,13 @@ test( .getByTestId("handle-splittext-shownode-chunks-right") .nth(0) .click(); - await page.getByTestId("handle-parsedata-shownode-data-left").click(); + await page + .getByTestId("handle-parsercomponent-shownode-data or dataframe-left") + .click(); //connection 4 await page - .getByTestId("handle-parsedata-shownode-message-right") + .getByTestId("handle-parsercomponent-shownode-parsed text-right") .nth(0) .click(); await page.getByTestId("handle-chatoutput-shownode-inputs-left").click(); From 3e093185bbbb98b771a25c3d783fa25dd4ee5d0e Mon Sep 17 00:00:00 2001 From: cristhianzl Date: Tue, 10 Jun 2025 16:04:47 -0300 Subject: [PATCH 8/8] add shards --- .github/workflows/typescript_test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/typescript_test.yml b/.github/workflows/typescript_test.yml index bb4039042571..580d3fcd2a80 100644 --- a/.github/workflows/typescript_test.yml +++ b/.github/workflows/typescript_test.yml @@ -220,12 +220,12 @@ jobs: echo "Total tests to run: $TEST_COUNT" - # Calculate optimal shard count - 1 shard per 5 tests, min 1, max 10 + # Calculate optimal shard count - 1 shard per 5 tests, min 1, max 40 SHARD_COUNT=$(( (TEST_COUNT + 4) / 5 )) if [ $SHARD_COUNT -lt 1 ]; then SHARD_COUNT=1 - elif [ $SHARD_COUNT -gt 10 ]; then - SHARD_COUNT=10 + elif [ $SHARD_COUNT -gt 40 ]; then + SHARD_COUNT=40 fi # Create the matrix combinations string