langflow-ai · rodrigosnader · Nov 17, 2025 · Nov 17, 2025 · Nov 17, 2025 · Nov 17, 2025
diff --git a/src/backend/base/langflow/initial_setup/starter_projects/Youtube Analysis.json b/src/backend/base/langflow/initial_setup/starter_projects/Youtube Analysis.json
@@ -520,7 +520,7 @@
             "legacy": false,
             "lf_version": "1.4.3",
             "metadata": {
-              "code_hash": "20398e0d18df",
+              "code_hash": "8c5296516f6c",
               "dependencies": {
                 "dependencies": [
                   {
@@ -595,7 +595,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "from contextlib import contextmanager\n\nimport pandas as pd\nfrom googleapiclient.discovery import build\nfrom googleapiclient.errors import HttpError\n\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, IntInput, MessageTextInput, SecretStrInput\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.template.field.base import Output\n\n\nclass YouTubeCommentsComponent(Component):\n    \"\"\"A component that retrieves comments from YouTube videos.\"\"\"\n\n    display_name: str = \"YouTube Comments\"\n    description: str = \"Retrieves and analyzes comments from YouTube videos.\"\n    icon: str = \"YouTube\"\n\n    # Constants\n    COMMENTS_DISABLED_STATUS = 403\n    NOT_FOUND_STATUS = 404\n    API_MAX_RESULTS = 100\n\n    inputs = [\n        MessageTextInput(\n            name=\"video_url\",\n            display_name=\"Video URL\",\n            info=\"The URL of the YouTube video to get comments from.\",\n            tool_mode=True,\n            required=True,\n        ),\n        SecretStrInput(\n            name=\"api_key\",\n            display_name=\"YouTube API Key\",\n            info=\"Your YouTube Data API key.\",\n            required=True,\n        ),\n        IntInput(\n            name=\"max_results\",\n            display_name=\"Max Results\",\n            value=20,\n            info=\"The maximum number of comments to return.\",\n        ),\n        DropdownInput(\n            name=\"sort_by\",\n            display_name=\"Sort By\",\n            options=[\"time\", \"relevance\"],\n            value=\"relevance\",\n            info=\"Sort comments by time or relevance.\",\n        ),\n        BoolInput(\n            name=\"include_replies\",\n            display_name=\"Include Replies\",\n            value=False,\n            info=\"Whether to include replies to comments.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"include_metrics\",\n            display_name=\"Include Metrics\",\n            value=True,\n            info=\"Include metrics like like count and reply count.\",\n            advanced=True,\n        ),\n    ]\n\n    outputs = [\n        Output(name=\"comments\", display_name=\"Comments\", method=\"get_video_comments\"),\n    ]\n\n    def _extract_video_id(self, video_url: str) -> str:\n        \"\"\"Extracts the video ID from a YouTube URL.\"\"\"\n        import re\n\n        patterns = [\n            r\"(?:youtube\\.com\\/watch\\?v=|youtu.be\\/|youtube.com\\/embed\\/)([^&\\n?#]+)\",\n            r\"youtube.com\\/shorts\\/([^&\\n?#]+)\",\n        ]\n\n        for pattern in patterns:\n            match = re.search(pattern, video_url)\n            if match:\n                return match.group(1)\n\n        return video_url.strip()\n\n    def _process_reply(self, reply: dict, parent_id: str, *, include_metrics: bool = True) -> dict:\n        \"\"\"Process a single reply comment.\"\"\"\n        reply_snippet = reply[\"snippet\"]\n        reply_data = {\n            \"comment_id\": reply[\"id\"],\n            \"parent_comment_id\": parent_id,\n            \"author\": reply_snippet[\"authorDisplayName\"],\n            \"text\": reply_snippet[\"textDisplay\"],\n            \"published_at\": reply_snippet[\"publishedAt\"],\n            \"is_reply\": True,\n        }\n        if include_metrics:\n            reply_data[\"like_count\"] = reply_snippet[\"likeCount\"]\n            reply_data[\"reply_count\"] = 0  # Replies can't have replies\n\n        return reply_data\n\n    def _process_comment(\n        self, item: dict, *, include_metrics: bool = True, include_replies: bool = False\n    ) -> list[dict]:\n        \"\"\"Process a single comment thread.\"\"\"\n        comment = item[\"snippet\"][\"topLevelComment\"][\"snippet\"]\n        comment_id = item[\"snippet\"][\"topLevelComment\"][\"id\"]\n\n        # Basic comment data\n        processed_comments = [\n            {\n                \"comment_id\": comment_id,\n                \"parent_comment_id\": \"\",  # Empty for top-level comments\n                \"author\": comment[\"authorDisplayName\"],\n                \"author_channel_url\": comment.get(\"authorChannelUrl\", \"\"),\n                \"text\": comment[\"textDisplay\"],\n                \"published_at\": comment[\"publishedAt\"],\n                \"updated_at\": comment[\"updatedAt\"],\n                \"is_reply\": False,\n            }\n        ]\n\n        # Add metrics if requested\n        if include_metrics:\n            processed_comments[0].update(\n                {\n                    \"like_count\": comment[\"likeCount\"],\n                    \"reply_count\": item[\"snippet\"][\"totalReplyCount\"],\n                }\n            )\n\n        # Add replies if requested\n        if include_replies and item[\"snippet\"][\"totalReplyCount\"] > 0 and \"replies\" in item:\n            for reply in item[\"replies\"][\"comments\"]:\n                reply_data = self._process_reply(reply, parent_id=comment_id, include_metrics=include_metrics)\n                processed_comments.append(reply_data)\n\n        return processed_comments\n\n    @contextmanager\n    def youtube_client(self):\n        \"\"\"Context manager for YouTube API client.\"\"\"\n        client = build(\"youtube\", \"v3\", developerKey=self.api_key)\n        try:\n            yield client\n        finally:\n            client.close()\n\n    def get_video_comments(self) -> DataFrame:\n        \"\"\"Retrieves comments from a YouTube video and returns as DataFrame.\"\"\"\n        try:\n            # Extract video ID from URL\n            video_id = self._extract_video_id(self.video_url)\n\n            # Use context manager for YouTube API client\n            with self.youtube_client() as youtube:\n                comments_data = []\n                results_count = 0\n                request = youtube.commentThreads().list(\n                    part=\"snippet,replies\",\n                    videoId=video_id,\n                    maxResults=min(self.API_MAX_RESULTS, self.max_results),\n                    order=self.sort_by,\n                    textFormat=\"plainText\",\n                )\n\n                while request and results_count < self.max_results:\n                    response = request.execute()\n\n                    for item in response.get(\"items\", []):\n                        if results_count >= self.max_results:\n                            break\n\n                        comments = self._process_comment(\n                            item, include_metrics=self.include_metrics, include_replies=self.include_replies\n                        )\n                        comments_data.extend(comments)\n                        results_count += 1\n\n                    # Get the next page if available and needed\n                    if \"nextPageToken\" in response and results_count < self.max_results:\n                        request = youtube.commentThreads().list(\n                            part=\"snippet,replies\",\n                            videoId=video_id,\n                            maxResults=min(self.API_MAX_RESULTS, self.max_results - results_count),\n                            order=self.sort_by,\n                            textFormat=\"plainText\",\n                            pageToken=response[\"nextPageToken\"],\n                        )\n                    else:\n                        request = None\n\n                # Convert to DataFrame\n                comments_df = pd.DataFrame(comments_data)\n\n                # Add video metadata\n                comments_df[\"video_id\"] = video_id\n                comments_df[\"video_url\"] = self.video_url\n\n                # Sort columns for better organization\n                column_order = [\n                    \"video_id\",\n                    \"video_url\",\n                    \"comment_id\",\n                    \"parent_comment_id\",\n                    \"is_reply\",\n                    \"author\",\n                    \"author_channel_url\",\n                    \"text\",\n                    \"published_at\",\n                    \"updated_at\",\n                ]\n\n                if self.include_metrics:\n                    column_order.extend([\"like_count\", \"reply_count\"])\n\n                comments_df = comments_df[column_order]\n\n                return DataFrame(comments_df)\n\n        except HttpError as e:\n            error_message = f\"YouTube API error: {e!s}\"\n            if e.resp.status == self.COMMENTS_DISABLED_STATUS:\n                error_message = \"Comments are disabled for this video or API quota exceeded.\"\n            elif e.resp.status == self.NOT_FOUND_STATUS:\n                error_message = \"Video not found.\"\n\n            return DataFrame(pd.DataFrame({\"error\": [error_message]}))\n"
+                "value": "from contextlib import contextmanager\n\nimport pandas as pd\nfrom googleapiclient.discovery import build\nfrom googleapiclient.errors import HttpError\n\nfrom lfx.custom.custom_component.component import Component\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, IntInput, MessageTextInput, SecretStrInput\nfrom lfx.schema.dataframe import DataFrame\nfrom lfx.template.field.base import Output\n\n\nclass YouTubeCommentsComponent(Component):\n    \"\"\"A component that retrieves comments from YouTube videos.\"\"\"\n\n    display_name: str = \"YouTube Comments\"\n    description: str = \"Retrieves and analyzes comments from YouTube videos.\"\n    icon: str = \"YouTube\"\n\n    # Constants\n    COMMENTS_DISABLED_STATUS = 403\n    NOT_FOUND_STATUS = 404\n    API_MAX_RESULTS = 100\n\n    inputs = [\n        MessageTextInput(\n            name=\"video_url\",\n            display_name=\"Video URL\",\n            info=\"The URL of the YouTube video to get comments from.\",\n            tool_mode=True,\n            required=True,\n        ),\n        SecretStrInput(\n            name=\"api_key\",\n            display_name=\"YouTube API Key\",\n            info=\"Your YouTube Data API key.\",\n            required=True,\n        ),\n        IntInput(\n            name=\"max_results\",\n            display_name=\"Max Results\",\n            value=20,\n            info=\"The maximum number of comments to return.\",\n        ),\n        DropdownInput(\n            name=\"sort_by\",\n            display_name=\"Sort By\",\n            options=[\"time\", \"relevance\"],\n            value=\"relevance\",\n            info=\"Sort comments by time or relevance.\",\n        ),\n        BoolInput(\n            name=\"include_replies\",\n            display_name=\"Include Replies\",\n            value=False,\n            info=\"Whether to include replies to comments.\",\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"include_metrics\",\n            display_name=\"Include Metrics\",\n            value=True,\n            info=\"Include metrics like like count and reply count.\",\n            advanced=True,\n        ),\n    ]\n\n    outputs = [\n        Output(name=\"comments\", display_name=\"Comments\", method=\"get_video_comments\"),\n    ]\n\n    def _extract_video_id(self, video_url: str) -> str:\n        \"\"\"Extracts the video ID from a YouTube URL.\"\"\"\n        import re\n\n        patterns = [\n            r\"(?:youtube\\.com\\/watch\\?v=|youtu.be\\/|youtube.com\\/embed\\/)([^&\\n?#]+)\",\n            r\"youtube.com\\/shorts\\/([^&\\n?#]+)\",\n        ]\n\n        for pattern in patterns:\n            match = re.search(pattern, video_url)\n            if match:\n                return match.group(1)\n\n        return video_url.strip()\n\n    def _process_reply(self, reply: dict, parent_id: str, *, include_metrics: bool = True) -> dict:\n        \"\"\"Process a single reply comment.\"\"\"\n        reply_snippet = reply[\"snippet\"]\n        reply_data = {\n            \"comment_id\": reply[\"id\"],\n            \"parent_comment_id\": parent_id,\n            \"author\": reply_snippet[\"authorDisplayName\"],\n            \"text\": reply_snippet[\"textDisplay\"],\n            \"published_at\": reply_snippet[\"publishedAt\"],\n            \"is_reply\": True,\n        }\n        if include_metrics:\n            reply_data[\"like_count\"] = reply_snippet[\"likeCount\"]\n            reply_data[\"reply_count\"] = 0  # Replies can't have replies\n\n        return reply_data\n\n    def _process_comment(\n        self, item: dict, *, include_metrics: bool = True, include_replies: bool = False\n    ) -> list[dict]:\n        \"\"\"Process a single comment thread.\"\"\"\n        comment = item[\"snippet\"][\"topLevelComment\"][\"snippet\"]\n        comment_id = item[\"snippet\"][\"topLevelComment\"][\"id\"]\n\n        # Basic comment data\n        processed_comments = [\n            {\n                \"comment_id\": comment_id,\n                \"parent_comment_id\": \"\",  # Empty for top-level comments\n                \"author\": comment[\"authorDisplayName\"],\n                \"author_channel_url\": comment.get(\"authorChannelUrl\", \"\"),\n                \"text\": comment[\"textDisplay\"],\n                \"published_at\": comment[\"publishedAt\"],\n                \"updated_at\": comment[\"updatedAt\"],\n                \"is_reply\": False,\n            }\n        ]\n\n        # Add metrics if requested\n        if include_metrics:\n            processed_comments[0].update(\n                {\n                    \"like_count\": comment[\"likeCount\"],\n                    \"reply_count\": item[\"snippet\"][\"totalReplyCount\"],\n                }\n            )\n\n        # Add replies if requested\n        if include_replies and item[\"snippet\"][\"totalReplyCount\"] > 0 and \"replies\" in item:\n            for reply in item[\"replies\"][\"comments\"]:\n                reply_data = self._process_reply(reply, parent_id=comment_id, include_metrics=include_metrics)\n                processed_comments.append(reply_data)\n\n        return processed_comments\n\n    @contextmanager\n    def youtube_client(self):\n        \"\"\"Context manager for YouTube API client.\"\"\"\n        client = build(\"youtube\", \"v3\", developerKey=self.api_key)\n        try:\n            yield client\n        finally:\n            client.close()\n\n    def get_video_comments(self) -> DataFrame:\n        \"\"\"Retrieves comments from a YouTube video and returns as DataFrame.\"\"\"\n        try:\n            # Extract video ID from URL\n            video_id = self._extract_video_id(self.video_url)\n\n            # Use context manager for YouTube API client\n            with self.youtube_client() as youtube:\n                comments_data = []\n                results_count = 0\n                request = youtube.commentThreads().list(\n                    part=\"snippet,replies\",\n                    videoId=video_id,\n                    maxResults=min(self.API_MAX_RESULTS, self.max_results),\n                    order=self.sort_by,\n                    textFormat=\"plainText\",\n                )\n\n                while request and results_count < self.max_results:\n                    response = request.execute()\n\n                    for item in response.get(\"items\", []):\n                        if results_count >= self.max_results:\n                            break\n\n                        comments = self._process_comment(\n                            item, include_metrics=self.include_metrics, include_replies=self.include_replies\n                        )\n                        comments_data.extend(comments)\n                        results_count += 1\n\n                    # Get the next page if available and needed\n                    if \"nextPageToken\" in response and results_count < self.max_results:\n                        request = youtube.commentThreads().list(\n                            part=\"snippet,replies\",\n                            videoId=video_id,\n                            maxResults=min(self.API_MAX_RESULTS, self.max_results - results_count),\n                            order=self.sort_by,\n                            textFormat=\"plainText\",\n                            pageToken=response[\"nextPageToken\"],\n                        )\n                    else:\n                        request = None\n\n                # Define column order\n                column_order = [\n                    \"video_id\",\n                    \"video_url\",\n                    \"comment_id\",\n                    \"parent_comment_id\",\n                    \"is_reply\",\n                    \"author\",\n                    \"author_channel_url\",\n                    \"text\",\n                    \"published_at\",\n                    \"updated_at\",\n                ]\n\n                if self.include_metrics:\n                    column_order.extend([\"like_count\", \"reply_count\"])\n\n                # Handle empty comments case\n                if not comments_data:\n                    # Create empty DataFrame with proper columns\n                    comments_df = pd.DataFrame(columns=column_order)\n                else:\n                    # Convert to DataFrame\n                    comments_df = pd.DataFrame(comments_data)\n\n                    # Add video metadata\n                    comments_df[\"video_id\"] = video_id\n                    comments_df[\"video_url\"] = self.video_url\n\n                    # Reorder columns\n                    comments_df = comments_df[column_order]\n\n                return DataFrame(comments_df)\n\n        except HttpError as e:\n            error_message = f\"YouTube API error: {e!s}\"\n            if e.resp.status == self.COMMENTS_DISABLED_STATUS:\n                error_message = \"Comments are disabled for this video or API quota exceeded.\"\n            elif e.resp.status == self.NOT_FOUND_STATUS:\n                error_message = \"Video not found.\"\n\n            return DataFrame(pd.DataFrame({\"error\": [error_message]}))\n"
               },
               "include_metrics": {
                 "_input_type": "BoolInput",

diff --git a/src/lfx/src/lfx/_assets/component_index.json b/src/lfx/src/lfx/_assets/component_index.json
diff --git a/src/lfx/src/lfx/components/youtube/comments.py b/src/lfx/src/lfx/components/youtube/comments.py
@@ -193,14 +193,7 @@ def get_video_comments(self) -> DataFrame:
                     else:
                         request = None
 
-                # Convert to DataFrame
-                comments_df = pd.DataFrame(comments_data)
-
-                # Add video metadata
-                comments_df["video_id"] = video_id
-                comments_df["video_url"] = self.video_url
-
-                # Sort columns for better organization
+                # Define column order
                 column_order = [
                     "video_id",
                     "video_url",
@@ -217,7 +210,20 @@ def get_video_comments(self) -> DataFrame:
                 if self.include_metrics:
                     column_order.extend(["like_count", "reply_count"])
 
-                comments_df = comments_df[column_order]
+                # Handle empty comments case
+                if not comments_data:
+                    # Create empty DataFrame with proper columns
+                    comments_df = pd.DataFrame(columns=column_order)
+                else:
+                    # Convert to DataFrame
+                    comments_df = pd.DataFrame(comments_data)
+
+                    # Add video metadata
+                    comments_df["video_id"] = video_id
+                    comments_df["video_url"] = self.video_url
+
+                    # Reorder columns
+                    comments_df = comments_df[column_order]
 
                 return DataFrame(comments_df)